Data Cleaning Process for Macroeconomic Risk Using Global Indicators

This is the cleaning process for the Macroeconomics risk predictions for Global indicators project

This project is fully documented on https://rpubs.com/ntuli/1384909

library(tidyverse)
library(rstatix)
library(lubridate)
library(rstatix)

Clean inflation dataset

#import dataset

inflation_consumer_price <- read_csv("inflation_consumer_price.csv")


#clean inflation dataset
#remove the fidt two colomns 

inflation_consumer_price <- inflation_consumer_price %>%
  dplyr::slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

inflation_consumer_price <- inflation_consumer_price %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Name", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

inflation_consumer_price <- inflation_consumer_price %>%
  pivot_longer( cols = "1960":"2024",
    names_to = "Years",
    values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(inflation_consumer_price$Country)

##   [1] "Aruba"                                                                    
##   [2] "Africa Eastern and Southern"                                              
##   [3] "Afghanistan"                                                              
##   [4] "Africa Western and Central"                                               
##   [5] "Angola"                                                                   
##   [6] "Albania"                                                                  
##   [7] "Arab World"                                                               
##   [8] "United Arab Emirates"                                                     
##   [9] "Argentina"                                                                
##  [10] "Armenia"                                                                  
##  [11] "Antigua and Barbuda"                                                      
##  [12] "Australia"                                                                
##  [13] "Austria"                                                                  
##  [14] "Azerbaijan"                                                               
##  [15] "Burundi"                                                                  
##  [16] "Belgium"                                                                  
##  [17] "Benin"                                                                    
##  [18] "Burkina Faso"                                                             
##  [19] "Bangladesh"                                                               
##  [20] "Bulgaria"                                                                 
##  [21] "Bahrain"                                                                  
##  [22] "Bahamas, The"                                                             
##  [23] "Bosnia and Herzegovina"                                                   
##  [24] "Belarus"                                                                  
##  [25] "Belize"                                                                   
##  [26] "Bolivia"                                                                  
##  [27] "Brazil"                                                                   
##  [28] "Barbados"                                                                 
##  [29] "Brunei Darussalam"                                                        
##  [30] "Bhutan"                                                                   
##  [31] "Botswana"                                                                 
##  [32] "Central African Republic"                                                 
##  [33] "Canada"                                                                   
##  [34] "Central Europe and the Baltics"                                           
##  [35] "Switzerland"                                                              
##  [36] "Chile"                                                                    
##  [37] "China"                                                                    
##  [38] "Cote d'Ivoire"                                                            
##  [39] "Cameroon"                                                                 
##  [40] "Congo, Dem. Rep."                                                         
##  [41] "Congo, Rep."                                                              
##  [42] "Colombia"                                                                 
##  [43] "Comoros"                                                                  
##  [44] "Cabo Verde"                                                               
##  [45] "Costa Rica"                                                               
##  [46] "Caribbean small states"                                                   
##  [47] "Curacao"                                                                  
##  [48] "Cayman Islands"                                                           
##  [49] "Cyprus"                                                                   
##  [50] "Czechia"                                                                  
##  [51] "Germany"                                                                  
##  [52] "Djibouti"                                                                 
##  [53] "Dominica"                                                                 
##  [54] "Denmark"                                                                  
##  [55] "Dominican Republic"                                                       
##  [56] "Algeria"                                                                  
##  [57] "East Asia & Pacific (excluding high income)"                              
##  [58] "Early-demographic dividend"                                               
##  [59] "East Asia & Pacific"                                                      
##  [60] "Europe & Central Asia (excluding high income)"                            
##  [61] "Europe & Central Asia"                                                    
##  [62] "Ecuador"                                                                  
##  [63] "Egypt, Arab Rep."                                                         
##  [64] "Euro area"                                                                
##  [65] "Spain"                                                                    
##  [66] "Estonia"                                                                  
##  [67] "Ethiopia"                                                                 
##  [68] "European Union"                                                           
##  [69] "Fragile and conflict affected situations"                                 
##  [70] "Finland"                                                                  
##  [71] "Fiji"                                                                     
##  [72] "France"                                                                   
##  [73] "Micronesia, Fed. Sts."                                                    
##  [74] "Gabon"                                                                    
##  [75] "United Kingdom"                                                           
##  [76] "Georgia"                                                                  
##  [77] "Ghana"                                                                    
##  [78] "Guinea"                                                                   
##  [79] "Gambia, The"                                                              
##  [80] "Guinea-Bissau"                                                            
##  [81] "Equatorial Guinea"                                                        
##  [82] "Greece"                                                                   
##  [83] "Grenada"                                                                  
##  [84] "Guatemala"                                                                
##  [85] "Guyana"                                                                   
##  [86] "High income"                                                              
##  [87] "Hong Kong SAR, China"                                                     
##  [88] "Honduras"                                                                 
##  [89] "Heavily indebted poor countries (HIPC)"                                   
##  [90] "Croatia"                                                                  
##  [91] "Haiti"                                                                    
##  [92] "Hungary"                                                                  
##  [93] "IBRD only"                                                                
##  [94] "IDA & IBRD total"                                                         
##  [95] "IDA total"                                                                
##  [96] "IDA blend"                                                                
##  [97] "Indonesia"                                                                
##  [98] "IDA only"                                                                 
##  [99] "India"                                                                    
## [100] "Ireland"                                                                  
## [101] "Iran, Islamic Rep."                                                       
## [102] "Iraq"                                                                     
## [103] "Iceland"                                                                  
## [104] "Israel"                                                                   
## [105] "Italy"                                                                    
## [106] "Jamaica"                                                                  
## [107] "Jordan"                                                                   
## [108] "Japan"                                                                    
## [109] "Kazakhstan"                                                               
## [110] "Kenya"                                                                    
## [111] "Kyrgyz Republic"                                                          
## [112] "Cambodia"                                                                 
## [113] "Kiribati"                                                                 
## [114] "St. Kitts and Nevis"                                                      
## [115] "Korea, Rep."                                                              
## [116] "Kuwait"                                                                   
## [117] "Latin America & Caribbean (excluding high income)"                        
## [118] "Lao PDR"                                                                  
## [119] "Lebanon"                                                                  
## [120] "Liberia"                                                                  
## [121] "Libya"                                                                    
## [122] "St. Lucia"                                                                
## [123] "Latin America & Caribbean"                                                
## [124] "Least developed countries: UN classification"                             
## [125] "Low income"                                                               
## [126] "Sri Lanka"                                                                
## [127] "Lower middle income"                                                      
## [128] "Low & middle income"                                                      
## [129] "Lesotho"                                                                  
## [130] "Late-demographic dividend"                                                
## [131] "Lithuania"                                                                
## [132] "Luxembourg"                                                               
## [133] "Latvia"                                                                   
## [134] "Macao SAR, China"                                                         
## [135] "Morocco"                                                                  
## [136] "Moldova"                                                                  
## [137] "Madagascar"                                                               
## [138] "Maldives"                                                                 
## [139] "Middle East, North Africa, Afghanistan & Pakistan"                        
## [140] "Mexico"                                                                   
## [141] "Middle income"                                                            
## [142] "North Macedonia"                                                          
## [143] "Mali"                                                                     
## [144] "Malta"                                                                    
## [145] "Myanmar"                                                                  
## [146] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [147] "Montenegro"                                                               
## [148] "Mongolia"                                                                 
## [149] "Mozambique"                                                               
## [150] "Mauritania"                                                               
## [151] "Mauritius"                                                                
## [152] "Malawi"                                                                   
## [153] "Malaysia"                                                                 
## [154] "North America"                                                            
## [155] "Namibia"                                                                  
## [156] "New Caledonia"                                                            
## [157] "Niger"                                                                    
## [158] "Nigeria"                                                                  
## [159] "Nicaragua"                                                                
## [160] "Netherlands"                                                              
## [161] "Norway"                                                                   
## [162] "Nepal"                                                                    
## [163] "Nauru"                                                                    
## [164] "New Zealand"                                                              
## [165] "OECD members"                                                             
## [166] "Oman"                                                                     
## [167] "Other small states"                                                       
## [168] "Pakistan"                                                                 
## [169] "Panama"                                                                   
## [170] "Peru"                                                                     
## [171] "Philippines"                                                              
## [172] "Palau"                                                                    
## [173] "Papua New Guinea"                                                         
## [174] "Poland"                                                                   
## [175] "Pre-demographic dividend"                                                 
## [176] "Portugal"                                                                 
## [177] "Paraguay"                                                                 
## [178] "West Bank and Gaza"                                                       
## [179] "Pacific island small states"                                              
## [180] "Post-demographic dividend"                                                
## [181] "Qatar"                                                                    
## [182] "Romania"                                                                  
## [183] "Russian Federation"                                                       
## [184] "Rwanda"                                                                   
## [185] "South Asia"                                                               
## [186] "Saudi Arabia"                                                             
## [187] "Sudan"                                                                    
## [188] "Senegal"                                                                  
## [189] "Singapore"                                                                
## [190] "Solomon Islands"                                                          
## [191] "Sierra Leone"                                                             
## [192] "El Salvador"                                                              
## [193] "San Marino"                                                               
## [194] "Serbia"                                                                   
## [195] "Sub-Saharan Africa (excluding high income)"                               
## [196] "South Sudan"                                                              
## [197] "Sub-Saharan Africa"                                                       
## [198] "Small states"                                                             
## [199] "Sao Tome and Principe"                                                    
## [200] "Suriname"                                                                 
## [201] "Slovak Republic"                                                          
## [202] "Slovenia"                                                                 
## [203] "Sweden"                                                                   
## [204] "Eswatini"                                                                 
## [205] "Sint Maarten (Dutch part)"                                                
## [206] "Seychelles"                                                               
## [207] "Syrian Arab Republic"                                                     
## [208] "Chad"                                                                     
## [209] "East Asia & Pacific (IDA & IBRD countries)"                               
## [210] "Europe & Central Asia (IDA & IBRD countries)"                             
## [211] "Togo"                                                                     
## [212] "Thailand"                                                                 
## [213] "Tajikistan"                                                               
## [214] "Latin America & the Caribbean (IDA & IBRD countries)"                     
## [215] "Timor-Leste"                                                              
## [216] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"           
## [217] "Tonga"                                                                    
## [218] "South Asia (IDA & IBRD)"                                                  
## [219] "Sub-Saharan Africa (IDA & IBRD countries)"                                
## [220] "Trinidad and Tobago"                                                      
## [221] "Tunisia"                                                                  
## [222] "Turkiye"                                                                  
## [223] "Tuvalu"                                                                   
## [224] "Tanzania"                                                                 
## [225] "Uganda"                                                                   
## [226] "Ukraine"                                                                  
## [227] "Upper middle income"                                                      
## [228] "Uruguay"                                                                  
## [229] "United States"                                                            
## [230] "Uzbekistan"                                                               
## [231] "St. Vincent and the Grenadines"                                           
## [232] "Venezuela, RB"                                                            
## [233] "Viet Nam"                                                                 
## [234] "Vanuatu"                                                                  
## [235] "World"                                                                    
## [236] "Samoa"                                                                    
## [237] "Kosovo"                                                                   
## [238] "Yemen, Rep."                                                              
## [239] "South Africa"                                                             
## [240] "Zambia"                                                                   
## [241] "Zimbabwe"

# remove the unneeded rows 
inflation_consumer_price <- inflation_consumer_price %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))

#remove deplicates

inflation_consumer_price <- inflation_consumer_price %>%
  distinct()

#see if theres any duplicates left

infl_dup <- inflation_consumer_price %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",infl_dup))

## [1] "Number of duplicate rows: 0"

clean gdp current

#import dataset

gdp_current_us <- read_csv("gdp_current$us.csv")


gdp_current_us <- gdp_current_us %>%
  dplyr:: slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

gdp_current_us <- gdp_current_us %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

gdp_current_us <- gdp_current_us %>%
  pivot_longer( cols = "1960":"2024",
                names_to = "Years",
                values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(gdp_current_us$Country)

##   [1] "Aruba"                                                                    
##   [2] "Africa Eastern and Southern"                                              
##   [3] "Afghanistan"                                                              
##   [4] "Africa Western and Central"                                               
##   [5] "Angola"                                                                   
##   [6] "Albania"                                                                  
##   [7] "Andorra"                                                                  
##   [8] "Arab World"                                                               
##   [9] "United Arab Emirates"                                                     
##  [10] "Argentina"                                                                
##  [11] "Armenia"                                                                  
##  [12] "American Samoa"                                                           
##  [13] "Antigua and Barbuda"                                                      
##  [14] "Australia"                                                                
##  [15] "Austria"                                                                  
##  [16] "Azerbaijan"                                                               
##  [17] "Burundi"                                                                  
##  [18] "Belgium"                                                                  
##  [19] "Benin"                                                                    
##  [20] "Burkina Faso"                                                             
##  [21] "Bangladesh"                                                               
##  [22] "Bulgaria"                                                                 
##  [23] "Bahrain"                                                                  
##  [24] "Bahamas, The"                                                             
##  [25] "Bosnia and Herzegovina"                                                   
##  [26] "Belarus"                                                                  
##  [27] "Belize"                                                                   
##  [28] "Bermuda"                                                                  
##  [29] "Bolivia"                                                                  
##  [30] "Brazil"                                                                   
##  [31] "Barbados"                                                                 
##  [32] "Brunei Darussalam"                                                        
##  [33] "Bhutan"                                                                   
##  [34] "Botswana"                                                                 
##  [35] "Central African Republic"                                                 
##  [36] "Canada"                                                                   
##  [37] "Central Europe and the Baltics"                                           
##  [38] "Switzerland"                                                              
##  [39] "Channel Islands"                                                          
##  [40] "Chile"                                                                    
##  [41] "China"                                                                    
##  [42] "Cote d'Ivoire"                                                            
##  [43] "Cameroon"                                                                 
##  [44] "Congo, Dem. Rep."                                                         
##  [45] "Congo, Rep."                                                              
##  [46] "Colombia"                                                                 
##  [47] "Comoros"                                                                  
##  [48] "Cabo Verde"                                                               
##  [49] "Costa Rica"                                                               
##  [50] "Caribbean small states"                                                   
##  [51] "Cuba"                                                                     
##  [52] "Curacao"                                                                  
##  [53] "Cayman Islands"                                                           
##  [54] "Cyprus"                                                                   
##  [55] "Czechia"                                                                  
##  [56] "Germany"                                                                  
##  [57] "Djibouti"                                                                 
##  [58] "Dominica"                                                                 
##  [59] "Denmark"                                                                  
##  [60] "Dominican Republic"                                                       
##  [61] "Algeria"                                                                  
##  [62] "East Asia & Pacific (excluding high income)"                              
##  [63] "Early-demographic dividend"                                               
##  [64] "East Asia & Pacific"                                                      
##  [65] "Europe & Central Asia (excluding high income)"                            
##  [66] "Europe & Central Asia"                                                    
##  [67] "Ecuador"                                                                  
##  [68] "Egypt, Arab Rep."                                                         
##  [69] "Euro area"                                                                
##  [70] "Eritrea"                                                                  
##  [71] "Spain"                                                                    
##  [72] "Estonia"                                                                  
##  [73] "Ethiopia"                                                                 
##  [74] "European Union"                                                           
##  [75] "Fragile and conflict affected situations"                                 
##  [76] "Finland"                                                                  
##  [77] "Fiji"                                                                     
##  [78] "France"                                                                   
##  [79] "Faroe Islands"                                                            
##  [80] "Micronesia, Fed. Sts."                                                    
##  [81] "Gabon"                                                                    
##  [82] "United Kingdom"                                                           
##  [83] "Georgia"                                                                  
##  [84] "Ghana"                                                                    
##  [85] "Guinea"                                                                   
##  [86] "Gambia, The"                                                              
##  [87] "Guinea-Bissau"                                                            
##  [88] "Equatorial Guinea"                                                        
##  [89] "Greece"                                                                   
##  [90] "Grenada"                                                                  
##  [91] "Greenland"                                                                
##  [92] "Guatemala"                                                                
##  [93] "Guam"                                                                     
##  [94] "Guyana"                                                                   
##  [95] "High income"                                                              
##  [96] "Hong Kong SAR, China"                                                     
##  [97] "Honduras"                                                                 
##  [98] "Heavily indebted poor countries (HIPC)"                                   
##  [99] "Croatia"                                                                  
## [100] "Haiti"                                                                    
## [101] "Hungary"                                                                  
## [102] "IBRD only"                                                                
## [103] "IDA & IBRD total"                                                         
## [104] "IDA total"                                                                
## [105] "IDA blend"                                                                
## [106] "Indonesia"                                                                
## [107] "IDA only"                                                                 
## [108] "Isle of Man"                                                              
## [109] "India"                                                                    
## [110] "Ireland"                                                                  
## [111] "Iran, Islamic Rep."                                                       
## [112] "Iraq"                                                                     
## [113] "Iceland"                                                                  
## [114] "Israel"                                                                   
## [115] "Italy"                                                                    
## [116] "Jamaica"                                                                  
## [117] "Jordan"                                                                   
## [118] "Japan"                                                                    
## [119] "Kazakhstan"                                                               
## [120] "Kenya"                                                                    
## [121] "Kyrgyz Republic"                                                          
## [122] "Cambodia"                                                                 
## [123] "Kiribati"                                                                 
## [124] "St. Kitts and Nevis"                                                      
## [125] "Korea, Rep."                                                              
## [126] "Kuwait"                                                                   
## [127] "Latin America & Caribbean (excluding high income)"                        
## [128] "Lao PDR"                                                                  
## [129] "Lebanon"                                                                  
## [130] "Liberia"                                                                  
## [131] "Libya"                                                                    
## [132] "St. Lucia"                                                                
## [133] "Latin America & Caribbean"                                                
## [134] "Least developed countries: UN classification"                             
## [135] "Low income"                                                               
## [136] "Liechtenstein"                                                            
## [137] "Sri Lanka"                                                                
## [138] "Lower middle income"                                                      
## [139] "Low & middle income"                                                      
## [140] "Lesotho"                                                                  
## [141] "Late-demographic dividend"                                                
## [142] "Lithuania"                                                                
## [143] "Luxembourg"                                                               
## [144] "Latvia"                                                                   
## [145] "Macao SAR, China"                                                         
## [146] "St. Martin (French part)"                                                 
## [147] "Morocco"                                                                  
## [148] "Monaco"                                                                   
## [149] "Moldova"                                                                  
## [150] "Madagascar"                                                               
## [151] "Maldives"                                                                 
## [152] "Middle East, North Africa, Afghanistan & Pakistan"                        
## [153] "Mexico"                                                                   
## [154] "Marshall Islands"                                                         
## [155] "Middle income"                                                            
## [156] "North Macedonia"                                                          
## [157] "Mali"                                                                     
## [158] "Malta"                                                                    
## [159] "Myanmar"                                                                  
## [160] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [161] "Montenegro"                                                               
## [162] "Mongolia"                                                                 
## [163] "Northern Mariana Islands"                                                 
## [164] "Mozambique"                                                               
## [165] "Mauritania"                                                               
## [166] "Mauritius"                                                                
## [167] "Malawi"                                                                   
## [168] "Malaysia"                                                                 
## [169] "North America"                                                            
## [170] "Namibia"                                                                  
## [171] "New Caledonia"                                                            
## [172] "Niger"                                                                    
## [173] "Nigeria"                                                                  
## [174] "Nicaragua"                                                                
## [175] "Netherlands"                                                              
## [176] "Norway"                                                                   
## [177] "Nepal"                                                                    
## [178] "Nauru"                                                                    
## [179] "New Zealand"                                                              
## [180] "OECD members"                                                             
## [181] "Oman"                                                                     
## [182] "Other small states"                                                       
## [183] "Pakistan"                                                                 
## [184] "Panama"                                                                   
## [185] "Peru"                                                                     
## [186] "Philippines"                                                              
## [187] "Palau"                                                                    
## [188] "Papua New Guinea"                                                         
## [189] "Poland"                                                                   
## [190] "Pre-demographic dividend"                                                 
## [191] "Puerto Rico (US)"                                                         
## [192] "Portugal"                                                                 
## [193] "Paraguay"                                                                 
## [194] "West Bank and Gaza"                                                       
## [195] "Pacific island small states"                                              
## [196] "Post-demographic dividend"                                                
## [197] "French Polynesia"                                                         
## [198] "Qatar"                                                                    
## [199] "Romania"                                                                  
## [200] "Russian Federation"                                                       
## [201] "Rwanda"                                                                   
## [202] "South Asia"                                                               
## [203] "Saudi Arabia"                                                             
## [204] "Sudan"                                                                    
## [205] "Senegal"                                                                  
## [206] "Singapore"                                                                
## [207] "Solomon Islands"                                                          
## [208] "Sierra Leone"                                                             
## [209] "El Salvador"                                                              
## [210] "San Marino"                                                               
## [211] "Somalia, Fed. Rep."                                                       
## [212] "Serbia"                                                                   
## [213] "Sub-Saharan Africa (excluding high income)"                               
## [214] "South Sudan"                                                              
## [215] "Sub-Saharan Africa"                                                       
## [216] "Small states"                                                             
## [217] "Sao Tome and Principe"                                                    
## [218] "Suriname"                                                                 
## [219] "Slovak Republic"                                                          
## [220] "Slovenia"                                                                 
## [221] "Sweden"                                                                   
## [222] "Eswatini"                                                                 
## [223] "Sint Maarten (Dutch part)"                                                
## [224] "Seychelles"                                                               
## [225] "Syrian Arab Republic"                                                     
## [226] "Turks and Caicos Islands"                                                 
## [227] "Chad"                                                                     
## [228] "East Asia & Pacific (IDA & IBRD countries)"                               
## [229] "Europe & Central Asia (IDA & IBRD countries)"                             
## [230] "Togo"                                                                     
## [231] "Thailand"                                                                 
## [232] "Tajikistan"                                                               
## [233] "Turkmenistan"                                                             
## [234] "Latin America & the Caribbean (IDA & IBRD countries)"                     
## [235] "Timor-Leste"                                                              
## [236] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"           
## [237] "Tonga"                                                                    
## [238] "South Asia (IDA & IBRD)"                                                  
## [239] "Sub-Saharan Africa (IDA & IBRD countries)"                                
## [240] "Trinidad and Tobago"                                                      
## [241] "Tunisia"                                                                  
## [242] "Turkiye"                                                                  
## [243] "Tuvalu"                                                                   
## [244] "Tanzania"                                                                 
## [245] "Uganda"                                                                   
## [246] "Ukraine"                                                                  
## [247] "Upper middle income"                                                      
## [248] "Uruguay"                                                                  
## [249] "United States"                                                            
## [250] "Uzbekistan"                                                               
## [251] "St. Vincent and the Grenadines"                                           
## [252] "Venezuela, RB"                                                            
## [253] "Virgin Islands (U.S.)"                                                    
## [254] "Viet Nam"                                                                 
## [255] "Vanuatu"                                                                  
## [256] "World"                                                                    
## [257] "Samoa"                                                                    
## [258] "Kosovo"                                                                   
## [259] "Yemen, Rep."                                                              
## [260] "South Africa"                                                             
## [261] "Zambia"                                                                   
## [262] "Zimbabwe"

# remove the unneeded rows 
gdp_current_us <- gdp_current_us %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))


#remove deplicates

gdp_current_us <- gdp_current_us %>%
  distinct()

#see if theres any duplicates left

gdp_current_dup <- gdp_current_us %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",gdp_current_dup))

## [1] "Number of duplicate rows: 0"

clean gdp per capita

gdp_per_capita <- read_csv("gdp_per_capita.csv")



gdp_per_capita <- gdp_per_capita %>%
  dplyr::slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

gdp_per_capita <- gdp_per_capita %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

gdp_per_capita <- gdp_per_capita %>%
  pivot_longer( cols = "1960":"2024",
                names_to = "Years",
                values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(gdp_per_capita$Country)

##   [1] "Aruba"                                                                    
##   [2] "Africa Eastern and Southern"                                              
##   [3] "Afghanistan"                                                              
##   [4] "Africa Western and Central"                                               
##   [5] "Angola"                                                                   
##   [6] "Albania"                                                                  
##   [7] "Andorra"                                                                  
##   [8] "Arab World"                                                               
##   [9] "United Arab Emirates"                                                     
##  [10] "Argentina"                                                                
##  [11] "Armenia"                                                                  
##  [12] "American Samoa"                                                           
##  [13] "Antigua and Barbuda"                                                      
##  [14] "Australia"                                                                
##  [15] "Austria"                                                                  
##  [16] "Azerbaijan"                                                               
##  [17] "Burundi"                                                                  
##  [18] "Belgium"                                                                  
##  [19] "Benin"                                                                    
##  [20] "Burkina Faso"                                                             
##  [21] "Bangladesh"                                                               
##  [22] "Bulgaria"                                                                 
##  [23] "Bahrain"                                                                  
##  [24] "Bahamas, The"                                                             
##  [25] "Bosnia and Herzegovina"                                                   
##  [26] "Belarus"                                                                  
##  [27] "Belize"                                                                   
##  [28] "Bermuda"                                                                  
##  [29] "Bolivia"                                                                  
##  [30] "Brazil"                                                                   
##  [31] "Barbados"                                                                 
##  [32] "Brunei Darussalam"                                                        
##  [33] "Bhutan"                                                                   
##  [34] "Botswana"                                                                 
##  [35] "Central African Republic"                                                 
##  [36] "Canada"                                                                   
##  [37] "Central Europe and the Baltics"                                           
##  [38] "Switzerland"                                                              
##  [39] "Channel Islands"                                                          
##  [40] "Chile"                                                                    
##  [41] "China"                                                                    
##  [42] "Cote d'Ivoire"                                                            
##  [43] "Cameroon"                                                                 
##  [44] "Congo, Dem. Rep."                                                         
##  [45] "Congo, Rep."                                                              
##  [46] "Colombia"                                                                 
##  [47] "Comoros"                                                                  
##  [48] "Cabo Verde"                                                               
##  [49] "Costa Rica"                                                               
##  [50] "Caribbean small states"                                                   
##  [51] "Cuba"                                                                     
##  [52] "Curacao"                                                                  
##  [53] "Cayman Islands"                                                           
##  [54] "Cyprus"                                                                   
##  [55] "Czechia"                                                                  
##  [56] "Germany"                                                                  
##  [57] "Djibouti"                                                                 
##  [58] "Dominica"                                                                 
##  [59] "Denmark"                                                                  
##  [60] "Dominican Republic"                                                       
##  [61] "Algeria"                                                                  
##  [62] "East Asia & Pacific (excluding high income)"                              
##  [63] "Early-demographic dividend"                                               
##  [64] "East Asia & Pacific"                                                      
##  [65] "Europe & Central Asia (excluding high income)"                            
##  [66] "Europe & Central Asia"                                                    
##  [67] "Ecuador"                                                                  
##  [68] "Egypt, Arab Rep."                                                         
##  [69] "Euro area"                                                                
##  [70] "Eritrea"                                                                  
##  [71] "Spain"                                                                    
##  [72] "Estonia"                                                                  
##  [73] "Ethiopia"                                                                 
##  [74] "European Union"                                                           
##  [75] "Fragile and conflict affected situations"                                 
##  [76] "Finland"                                                                  
##  [77] "Fiji"                                                                     
##  [78] "France"                                                                   
##  [79] "Faroe Islands"                                                            
##  [80] "Micronesia, Fed. Sts."                                                    
##  [81] "Gabon"                                                                    
##  [82] "United Kingdom"                                                           
##  [83] "Georgia"                                                                  
##  [84] "Ghana"                                                                    
##  [85] "Guinea"                                                                   
##  [86] "Gambia, The"                                                              
##  [87] "Guinea-Bissau"                                                            
##  [88] "Equatorial Guinea"                                                        
##  [89] "Greece"                                                                   
##  [90] "Grenada"                                                                  
##  [91] "Greenland"                                                                
##  [92] "Guatemala"                                                                
##  [93] "Guam"                                                                     
##  [94] "Guyana"                                                                   
##  [95] "High income"                                                              
##  [96] "Hong Kong SAR, China"                                                     
##  [97] "Honduras"                                                                 
##  [98] "Heavily indebted poor countries (HIPC)"                                   
##  [99] "Croatia"                                                                  
## [100] "Haiti"                                                                    
## [101] "Hungary"                                                                  
## [102] "IBRD only"                                                                
## [103] "IDA & IBRD total"                                                         
## [104] "IDA total"                                                                
## [105] "IDA blend"                                                                
## [106] "Indonesia"                                                                
## [107] "IDA only"                                                                 
## [108] "Isle of Man"                                                              
## [109] "India"                                                                    
## [110] "Ireland"                                                                  
## [111] "Iran, Islamic Rep."                                                       
## [112] "Iraq"                                                                     
## [113] "Iceland"                                                                  
## [114] "Israel"                                                                   
## [115] "Italy"                                                                    
## [116] "Jamaica"                                                                  
## [117] "Jordan"                                                                   
## [118] "Japan"                                                                    
## [119] "Kazakhstan"                                                               
## [120] "Kenya"                                                                    
## [121] "Kyrgyz Republic"                                                          
## [122] "Cambodia"                                                                 
## [123] "Kiribati"                                                                 
## [124] "St. Kitts and Nevis"                                                      
## [125] "Korea, Rep."                                                              
## [126] "Kuwait"                                                                   
## [127] "Latin America & Caribbean (excluding high income)"                        
## [128] "Lao PDR"                                                                  
## [129] "Lebanon"                                                                  
## [130] "Liberia"                                                                  
## [131] "Libya"                                                                    
## [132] "St. Lucia"                                                                
## [133] "Latin America & Caribbean"                                                
## [134] "Least developed countries: UN classification"                             
## [135] "Low income"                                                               
## [136] "Liechtenstein"                                                            
## [137] "Sri Lanka"                                                                
## [138] "Lower middle income"                                                      
## [139] "Low & middle income"                                                      
## [140] "Lesotho"                                                                  
## [141] "Late-demographic dividend"                                                
## [142] "Lithuania"                                                                
## [143] "Luxembourg"                                                               
## [144] "Latvia"                                                                   
## [145] "Macao SAR, China"                                                         
## [146] "St. Martin (French part)"                                                 
## [147] "Morocco"                                                                  
## [148] "Monaco"                                                                   
## [149] "Moldova"                                                                  
## [150] "Madagascar"                                                               
## [151] "Maldives"                                                                 
## [152] "Middle East, North Africa, Afghanistan & Pakistan"                        
## [153] "Mexico"                                                                   
## [154] "Marshall Islands"                                                         
## [155] "Middle income"                                                            
## [156] "North Macedonia"                                                          
## [157] "Mali"                                                                     
## [158] "Malta"                                                                    
## [159] "Myanmar"                                                                  
## [160] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [161] "Montenegro"                                                               
## [162] "Mongolia"                                                                 
## [163] "Northern Mariana Islands"                                                 
## [164] "Mozambique"                                                               
## [165] "Mauritania"                                                               
## [166] "Mauritius"                                                                
## [167] "Malawi"                                                                   
## [168] "Malaysia"                                                                 
## [169] "North America"                                                            
## [170] "Namibia"                                                                  
## [171] "New Caledonia"                                                            
## [172] "Niger"                                                                    
## [173] "Nigeria"                                                                  
## [174] "Nicaragua"                                                                
## [175] "Netherlands"                                                              
## [176] "Norway"                                                                   
## [177] "Nepal"                                                                    
## [178] "Nauru"                                                                    
## [179] "New Zealand"                                                              
## [180] "OECD members"                                                             
## [181] "Oman"                                                                     
## [182] "Other small states"                                                       
## [183] "Pakistan"                                                                 
## [184] "Panama"                                                                   
## [185] "Peru"                                                                     
## [186] "Philippines"                                                              
## [187] "Palau"                                                                    
## [188] "Papua New Guinea"                                                         
## [189] "Poland"                                                                   
## [190] "Pre-demographic dividend"                                                 
## [191] "Puerto Rico (US)"                                                         
## [192] "Portugal"                                                                 
## [193] "Paraguay"                                                                 
## [194] "West Bank and Gaza"                                                       
## [195] "Pacific island small states"                                              
## [196] "Post-demographic dividend"                                                
## [197] "French Polynesia"                                                         
## [198] "Qatar"                                                                    
## [199] "Romania"                                                                  
## [200] "Russian Federation"                                                       
## [201] "Rwanda"                                                                   
## [202] "South Asia"                                                               
## [203] "Saudi Arabia"                                                             
## [204] "Sudan"                                                                    
## [205] "Senegal"                                                                  
## [206] "Singapore"                                                                
## [207] "Solomon Islands"                                                          
## [208] "Sierra Leone"                                                             
## [209] "El Salvador"                                                              
## [210] "San Marino"                                                               
## [211] "Somalia, Fed. Rep."                                                       
## [212] "Serbia"                                                                   
## [213] "Sub-Saharan Africa (excluding high income)"                               
## [214] "South Sudan"                                                              
## [215] "Sub-Saharan Africa"                                                       
## [216] "Small states"                                                             
## [217] "Sao Tome and Principe"                                                    
## [218] "Suriname"                                                                 
## [219] "Slovak Republic"                                                          
## [220] "Slovenia"                                                                 
## [221] "Sweden"                                                                   
## [222] "Eswatini"                                                                 
## [223] "Sint Maarten (Dutch part)"                                                
## [224] "Seychelles"                                                               
## [225] "Syrian Arab Republic"                                                     
## [226] "Turks and Caicos Islands"                                                 
## [227] "Chad"                                                                     
## [228] "East Asia & Pacific (IDA & IBRD countries)"                               
## [229] "Europe & Central Asia (IDA & IBRD countries)"                             
## [230] "Togo"                                                                     
## [231] "Thailand"                                                                 
## [232] "Tajikistan"                                                               
## [233] "Turkmenistan"                                                             
## [234] "Latin America & the Caribbean (IDA & IBRD countries)"                     
## [235] "Timor-Leste"                                                              
## [236] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"           
## [237] "Tonga"                                                                    
## [238] "South Asia (IDA & IBRD)"                                                  
## [239] "Sub-Saharan Africa (IDA & IBRD countries)"                                
## [240] "Trinidad and Tobago"                                                      
## [241] "Tunisia"                                                                  
## [242] "Turkiye"                                                                  
## [243] "Tuvalu"                                                                   
## [244] "Tanzania"                                                                 
## [245] "Uganda"                                                                   
## [246] "Ukraine"                                                                  
## [247] "Upper middle income"                                                      
## [248] "Uruguay"                                                                  
## [249] "United States"                                                            
## [250] "Uzbekistan"                                                               
## [251] "St. Vincent and the Grenadines"                                           
## [252] "Venezuela, RB"                                                            
## [253] "Virgin Islands (U.S.)"                                                    
## [254] "Viet Nam"                                                                 
## [255] "Vanuatu"                                                                  
## [256] "World"                                                                    
## [257] "Samoa"                                                                    
## [258] "Kosovo"                                                                   
## [259] "Yemen, Rep."                                                              
## [260] "South Africa"                                                             
## [261] "Zambia"                                                                   
## [262] "Zimbabwe"

# remove the unneeded rows 
gdp_per_capita <- gdp_per_capita %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))


#remove deplicates

gdp_per_capita <- gdp_per_capita %>%
  distinct()

#see if theres any duplicates left

gdp_per_capita_dup <- gdp_per_capita %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",gdp_per_capita_dup))

## [1] "Number of duplicate rows: 0"

CLEAN PRIVATE SECTOR

private_sector_credit <- read_csv("private_sector_credit.csv")


private_sector_credit <- private_sector_credit %>%
  dplyr::slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

private_sector_credit <- private_sector_credit %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

private_sector_credit <- private_sector_credit %>%
  pivot_longer( cols = "1960":"2024",
                names_to = "Years",
                values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(private_sector_credit$Country)

##   [1] "Aruba"                                                                    
##   [2] "Africa Eastern and Southern"                                              
##   [3] "Afghanistan"                                                              
##   [4] "Africa Western and Central"                                               
##   [5] "Angola"                                                                   
##   [6] "Albania"                                                                  
##   [7] "Arab World"                                                               
##   [8] "United Arab Emirates"                                                     
##   [9] "Argentina"                                                                
##  [10] "Armenia"                                                                  
##  [11] "Antigua and Barbuda"                                                      
##  [12] "Australia"                                                                
##  [13] "Austria"                                                                  
##  [14] "Azerbaijan"                                                               
##  [15] "Burundi"                                                                  
##  [16] "Belgium"                                                                  
##  [17] "Benin"                                                                    
##  [18] "Burkina Faso"                                                             
##  [19] "Bangladesh"                                                               
##  [20] "Bulgaria"                                                                 
##  [21] "Bahrain"                                                                  
##  [22] "Bahamas, The"                                                             
##  [23] "Bosnia and Herzegovina"                                                   
##  [24] "Belarus"                                                                  
##  [25] "Belize"                                                                   
##  [26] "Bolivia"                                                                  
##  [27] "Brazil"                                                                   
##  [28] "Barbados"                                                                 
##  [29] "Brunei Darussalam"                                                        
##  [30] "Bhutan"                                                                   
##  [31] "Botswana"                                                                 
##  [32] "Central African Republic"                                                 
##  [33] "Canada"                                                                   
##  [34] "Central Europe and the Baltics"                                           
##  [35] "Switzerland"                                                              
##  [36] "Chile"                                                                    
##  [37] "China"                                                                    
##  [38] "Cote d'Ivoire"                                                            
##  [39] "Cameroon"                                                                 
##  [40] "Congo, Dem. Rep."                                                         
##  [41] "Congo, Rep."                                                              
##  [42] "Colombia"                                                                 
##  [43] "Comoros"                                                                  
##  [44] "Cabo Verde"                                                               
##  [45] "Costa Rica"                                                               
##  [46] "Caribbean small states"                                                   
##  [47] "Cyprus"                                                                   
##  [48] "Czechia"                                                                  
##  [49] "Germany"                                                                  
##  [50] "Djibouti"                                                                 
##  [51] "Dominica"                                                                 
##  [52] "Denmark"                                                                  
##  [53] "Dominican Republic"                                                       
##  [54] "Algeria"                                                                  
##  [55] "East Asia & Pacific (excluding high income)"                              
##  [56] "Early-demographic dividend"                                               
##  [57] "East Asia & Pacific"                                                      
##  [58] "Europe & Central Asia (excluding high income)"                            
##  [59] "Europe & Central Asia"                                                    
##  [60] "Ecuador"                                                                  
##  [61] "Egypt, Arab Rep."                                                         
##  [62] "Euro area"                                                                
##  [63] "Eritrea"                                                                  
##  [64] "Spain"                                                                    
##  [65] "Estonia"                                                                  
##  [66] "Ethiopia"                                                                 
##  [67] "European Union"                                                           
##  [68] "Fragile and conflict affected situations"                                 
##  [69] "Finland"                                                                  
##  [70] "Fiji"                                                                     
##  [71] "France"                                                                   
##  [72] "Micronesia, Fed. Sts."                                                    
##  [73] "Gabon"                                                                    
##  [74] "United Kingdom"                                                           
##  [75] "Georgia"                                                                  
##  [76] "Ghana"                                                                    
##  [77] "Guinea"                                                                   
##  [78] "Gambia, The"                                                              
##  [79] "Guinea-Bissau"                                                            
##  [80] "Equatorial Guinea"                                                        
##  [81] "Greece"                                                                   
##  [82] "Grenada"                                                                  
##  [83] "Guatemala"                                                                
##  [84] "Guyana"                                                                   
##  [85] "High income"                                                              
##  [86] "Hong Kong SAR, China"                                                     
##  [87] "Honduras"                                                                 
##  [88] "Heavily indebted poor countries (HIPC)"                                   
##  [89] "Croatia"                                                                  
##  [90] "Haiti"                                                                    
##  [91] "Hungary"                                                                  
##  [92] "IBRD only"                                                                
##  [93] "IDA & IBRD total"                                                         
##  [94] "IDA total"                                                                
##  [95] "IDA blend"                                                                
##  [96] "Indonesia"                                                                
##  [97] "IDA only"                                                                 
##  [98] "India"                                                                    
##  [99] "Ireland"                                                                  
## [100] "Iran, Islamic Rep."                                                       
## [101] "Iraq"                                                                     
## [102] "Iceland"                                                                  
## [103] "Israel"                                                                   
## [104] "Italy"                                                                    
## [105] "Jamaica"                                                                  
## [106] "Jordan"                                                                   
## [107] "Japan"                                                                    
## [108] "Kazakhstan"                                                               
## [109] "Kenya"                                                                    
## [110] "Kyrgyz Republic"                                                          
## [111] "Cambodia"                                                                 
## [112] "St. Kitts and Nevis"                                                      
## [113] "Korea, Rep."                                                              
## [114] "Kuwait"                                                                   
## [115] "Latin America & Caribbean (excluding high income)"                        
## [116] "Lao PDR"                                                                  
## [117] "Lebanon"                                                                  
## [118] "Liberia"                                                                  
## [119] "Libya"                                                                    
## [120] "St. Lucia"                                                                
## [121] "Latin America & Caribbean"                                                
## [122] "Least developed countries: UN classification"                             
## [123] "Low income"                                                               
## [124] "Sri Lanka"                                                                
## [125] "Lower middle income"                                                      
## [126] "Low & middle income"                                                      
## [127] "Lesotho"                                                                  
## [128] "Late-demographic dividend"                                                
## [129] "Lithuania"                                                                
## [130] "Luxembourg"                                                               
## [131] "Latvia"                                                                   
## [132] "Macao SAR, China"                                                         
## [133] "Morocco"                                                                  
## [134] "Moldova"                                                                  
## [135] "Madagascar"                                                               
## [136] "Maldives"                                                                 
## [137] "Middle East, North Africa, Afghanistan & Pakistan"                        
## [138] "Mexico"                                                                   
## [139] "Middle income"                                                            
## [140] "North Macedonia"                                                          
## [141] "Mali"                                                                     
## [142] "Malta"                                                                    
## [143] "Myanmar"                                                                  
## [144] "Middle East, North Africa, Afghanistan & Pakistan (excluding high income)"
## [145] "Montenegro"                                                               
## [146] "Mongolia"                                                                 
## [147] "Mozambique"                                                               
## [148] "Mauritania"                                                               
## [149] "Mauritius"                                                                
## [150] "Malawi"                                                                   
## [151] "Malaysia"                                                                 
## [152] "North America"                                                            
## [153] "Namibia"                                                                  
## [154] "Niger"                                                                    
## [155] "Nigeria"                                                                  
## [156] "Nicaragua"                                                                
## [157] "Netherlands"                                                              
## [158] "Norway"                                                                   
## [159] "Nepal"                                                                    
## [160] "New Zealand"                                                              
## [161] "OECD members"                                                             
## [162] "Oman"                                                                     
## [163] "Other small states"                                                       
## [164] "Pakistan"                                                                 
## [165] "Panama"                                                                   
## [166] "Peru"                                                                     
## [167] "Philippines"                                                              
## [168] "Papua New Guinea"                                                         
## [169] "Poland"                                                                   
## [170] "Pre-demographic dividend"                                                 
## [171] "Portugal"                                                                 
## [172] "Paraguay"                                                                 
## [173] "West Bank and Gaza"                                                       
## [174] "Pacific island small states"                                              
## [175] "Post-demographic dividend"                                                
## [176] "Qatar"                                                                    
## [177] "Romania"                                                                  
## [178] "Russian Federation"                                                       
## [179] "Rwanda"                                                                   
## [180] "South Asia"                                                               
## [181] "Saudi Arabia"                                                             
## [182] "Sudan"                                                                    
## [183] "Senegal"                                                                  
## [184] "Singapore"                                                                
## [185] "Solomon Islands"                                                          
## [186] "Sierra Leone"                                                             
## [187] "El Salvador"                                                              
## [188] "Somalia, Fed. Rep."                                                       
## [189] "Serbia"                                                                   
## [190] "Sub-Saharan Africa (excluding high income)"                               
## [191] "South Sudan"                                                              
## [192] "Sub-Saharan Africa"                                                       
## [193] "Small states"                                                             
## [194] "Sao Tome and Principe"                                                    
## [195] "Suriname"                                                                 
## [196] "Slovak Republic"                                                          
## [197] "Slovenia"                                                                 
## [198] "Sweden"                                                                   
## [199] "Eswatini"                                                                 
## [200] "Seychelles"                                                               
## [201] "Syrian Arab Republic"                                                     
## [202] "Chad"                                                                     
## [203] "East Asia & Pacific (IDA & IBRD countries)"                               
## [204] "Europe & Central Asia (IDA & IBRD countries)"                             
## [205] "Togo"                                                                     
## [206] "Thailand"                                                                 
## [207] "Tajikistan"                                                               
## [208] "Latin America & the Caribbean (IDA & IBRD countries)"                     
## [209] "Timor-Leste"                                                              
## [210] "Middle East, North Africa, Afghanistan & Pakistan (IDA & IBRD)"           
## [211] "Tonga"                                                                    
## [212] "South Asia (IDA & IBRD)"                                                  
## [213] "Sub-Saharan Africa (IDA & IBRD countries)"                                
## [214] "Trinidad and Tobago"                                                      
## [215] "Tunisia"                                                                  
## [216] "Turkiye"                                                                  
## [217] "Tanzania"                                                                 
## [218] "Uganda"                                                                   
## [219] "Ukraine"                                                                  
## [220] "Upper middle income"                                                      
## [221] "Uruguay"                                                                  
## [222] "United States"                                                            
## [223] "Uzbekistan"                                                               
## [224] "St. Vincent and the Grenadines"                                           
## [225] "Venezuela, RB"                                                            
## [226] "Viet Nam"                                                                 
## [227] "Vanuatu"                                                                  
## [228] "World"                                                                    
## [229] "Samoa"                                                                    
## [230] "Kosovo"                                                                   
## [231] "Yemen, Rep."                                                              
## [232] "South Africa"                                                             
## [233] "Zambia"                                                                   
## [234] "Zimbabwe"

# remove the unneeded rows 
private_sector_credit <- private_sector_credit %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))


#remove deplicates

private_sector_credit <- private_sector_credit %>%
  distinct()

#see if theres any duplicates left

private_sector_credit_dup <- private_sector_credit %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",private_sector_credit_dup))

## [1] "Number of duplicate rows: 0"

Clean Exchange rate

exchange_rate <- read_csv("exchange_rate.csv")

exchange_rate <- exchange_rate %>%
  dplyr::slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

exchange_rate <- exchange_rate %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

exchange_rate <- exchange_rate %>%
  pivot_longer( cols = "1960":"2024",
                names_to = "Years",
                values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(exchange_rate$Country)

##   [1] "Aruba"                          "Afghanistan"                   
##   [3] "Angola"                         "Albania"                       
##   [5] "Andorra"                        "United Arab Emirates"          
##   [7] "Argentina"                      "Armenia"                       
##   [9] "American Samoa"                 "Antigua and Barbuda"           
##  [11] "Australia"                      "Austria"                       
##  [13] "Azerbaijan"                     "Burundi"                       
##  [15] "Belgium"                        "Benin"                         
##  [17] "Burkina Faso"                   "Bangladesh"                    
##  [19] "Bulgaria"                       "Bahrain"                       
##  [21] "Bahamas, The"                   "Bosnia and Herzegovina"        
##  [23] "Belarus"                        "Belize"                        
##  [25] "Bermuda"                        "Bolivia"                       
##  [27] "Brazil"                         "Barbados"                      
##  [29] "Brunei Darussalam"              "Bhutan"                        
##  [31] "Botswana"                       "Central African Republic"      
##  [33] "Canada"                         "Switzerland"                   
##  [35] "Channel Islands"                "Chile"                         
##  [37] "China"                          "Cote d'Ivoire"                 
##  [39] "Cameroon"                       "Congo, Dem. Rep."              
##  [41] "Congo, Rep."                    "Colombia"                      
##  [43] "Comoros"                        "Cabo Verde"                    
##  [45] "Costa Rica"                     "Cuba"                          
##  [47] "Curacao"                        "Cayman Islands"                
##  [49] "Cyprus"                         "Czechia"                       
##  [51] "Germany"                        "Djibouti"                      
##  [53] "Dominica"                       "Denmark"                       
##  [55] "Dominican Republic"             "Algeria"                       
##  [57] "Ecuador"                        "Egypt, Arab Rep."              
##  [59] "Euro area"                      "Eritrea"                       
##  [61] "Spain"                          "Estonia"                       
##  [63] "Ethiopia"                       "Finland"                       
##  [65] "Fiji"                           "France"                        
##  [67] "Faroe Islands"                  "Micronesia, Fed. Sts."         
##  [69] "Gabon"                          "United Kingdom"                
##  [71] "Georgia"                        "Ghana"                         
##  [73] "Gibraltar"                      "Guinea"                        
##  [75] "Gambia, The"                    "Guinea-Bissau"                 
##  [77] "Equatorial Guinea"              "Greece"                        
##  [79] "Grenada"                        "Greenland"                     
##  [81] "Guatemala"                      "Guam"                          
##  [83] "Guyana"                         "Hong Kong SAR, China"          
##  [85] "Honduras"                       "Croatia"                       
##  [87] "Haiti"                          "Hungary"                       
##  [89] "Indonesia"                      "Isle of Man"                   
##  [91] "India"                          "Ireland"                       
##  [93] "Iran, Islamic Rep."             "Iraq"                          
##  [95] "Iceland"                        "Israel"                        
##  [97] "Italy"                          "Jamaica"                       
##  [99] "Jordan"                         "Japan"                         
## [101] "Kazakhstan"                     "Kenya"                         
## [103] "Kyrgyz Republic"                "Cambodia"                      
## [105] "Kiribati"                       "St. Kitts and Nevis"           
## [107] "Korea, Rep."                    "Kuwait"                        
## [109] "Lao PDR"                        "Lebanon"                       
## [111] "Liberia"                        "Libya"                         
## [113] "St. Lucia"                      "Sri Lanka"                     
## [115] "Lesotho"                        "Lithuania"                     
## [117] "Luxembourg"                     "Latvia"                        
## [119] "Macao SAR, China"               "St. Martin (French part)"      
## [121] "Morocco"                        "Monaco"                        
## [123] "Moldova"                        "Madagascar"                    
## [125] "Maldives"                       "Mexico"                        
## [127] "Marshall Islands"               "North Macedonia"               
## [129] "Mali"                           "Malta"                         
## [131] "Myanmar"                        "Montenegro"                    
## [133] "Mongolia"                       "Northern Mariana Islands"      
## [135] "Mozambique"                     "Mauritania"                    
## [137] "Mauritius"                      "Malawi"                        
## [139] "Malaysia"                       "Namibia"                       
## [141] "New Caledonia"                  "Niger"                         
## [143] "Nigeria"                        "Nicaragua"                     
## [145] "Netherlands"                    "Norway"                        
## [147] "Nepal"                          "Nauru"                         
## [149] "New Zealand"                    "Oman"                          
## [151] "Pakistan"                       "Panama"                        
## [153] "Peru"                           "Philippines"                   
## [155] "Palau"                          "Papua New Guinea"              
## [157] "Poland"                         "Puerto Rico (US)"              
## [159] "Portugal"                       "Paraguay"                      
## [161] "French Polynesia"               "Qatar"                         
## [163] "Romania"                        "Russian Federation"            
## [165] "Rwanda"                         "Saudi Arabia"                  
## [167] "Sudan"                          "Senegal"                       
## [169] "Singapore"                      "Solomon Islands"               
## [171] "Sierra Leone"                   "El Salvador"                   
## [173] "San Marino"                     "Somalia, Fed. Rep."            
## [175] "Serbia"                         "South Sudan"                   
## [177] "Sao Tome and Principe"          "Suriname"                      
## [179] "Slovak Republic"                "Slovenia"                      
## [181] "Sweden"                         "Eswatini"                      
## [183] "Sint Maarten (Dutch part)"      "Seychelles"                    
## [185] "Syrian Arab Republic"           "Turks and Caicos Islands"      
## [187] "Chad"                           "Togo"                          
## [189] "Thailand"                       "Tajikistan"                    
## [191] "Turkmenistan"                   "Timor-Leste"                   
## [193] "Tonga"                          "Trinidad and Tobago"           
## [195] "Tunisia"                        "Turkiye"                       
## [197] "Tanzania"                       "Uganda"                        
## [199] "Ukraine"                        "Uruguay"                       
## [201] "United States"                  "Uzbekistan"                    
## [203] "St. Vincent and the Grenadines" "Venezuela, RB"                 
## [205] "Virgin Islands (U.S.)"          "Viet Nam"                      
## [207] "Vanuatu"                        "Samoa"                         
## [209] "Kosovo"                         "Yemen, Rep."                   
## [211] "South Africa"                   "Zambia"                        
## [213] "Zimbabwe"

# remove the unneeded rows 
exchange_rate <- exchange_rate %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))


#remove deplicates

exchange_rate <- exchange_rate %>%
  distinct()

#see if theres any duplicates left

exchange_rate_dup <- exchange_rate %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",exchange_rate_dup))

## [1] "Number of duplicate rows: 0"

Clean current account balance

current_account_balance <- read_csv("current_account_balance.csv")


current_account_balance <- current_account_balance %>%
  dplyr::slice(-1, -2) %>% #rename the colomns 
  rename("Country" = "Data Source",
         "Country_Code" = "World Development Indicators",
         "Economic_Indicator" = "...3")

#split the indicator colomn into separate colomns

current_account_balance <- current_account_balance %>%
  separate_wider_delim(
    Economic_Indicator, 
    delim = ",",
    names = c("Economic_Indicator", "Indicator_Code", as.character(1960:2024)),
    too_few = 'align_start',
    too_many = "drop"
  )


#RESHAPE AND PIVOTE THE DATA TO LONGER

current_account_balance <- current_account_balance %>%
  pivot_longer( cols = "1960":"2024",
                names_to = "Years",
                values_to = "Values"
  ) %>% #values to numeric
  mutate(Values = as.numeric(Values)) %>% #remove nulls
  drop_na()


#check the countries 

unique(current_account_balance$Country)

##   [1] "Aruba"                          "Afghanistan"                   
##   [3] "Angola"                         "Albania"                       
##   [5] "Andorra"                        "Argentina"                     
##   [7] "Armenia"                        "Antigua and Barbuda"           
##   [9] "Australia"                      "Austria"                       
##  [11] "Azerbaijan"                     "Burundi"                       
##  [13] "Belgium"                        "Benin"                         
##  [15] "Burkina Faso"                   "Bangladesh"                    
##  [17] "Bulgaria"                       "Bahrain"                       
##  [19] "Bahamas, The"                   "Bosnia and Herzegovina"        
##  [21] "Belarus"                        "Belize"                        
##  [23] "Bermuda"                        "Bolivia"                       
##  [25] "Brazil"                         "Barbados"                      
##  [27] "Brunei Darussalam"              "Bhutan"                        
##  [29] "Botswana"                       "Central African Republic"      
##  [31] "Canada"                         "Switzerland"                   
##  [33] "Chile"                          "China"                         
##  [35] "Cote d'Ivoire"                  "Cameroon"                      
##  [37] "Congo, Dem. Rep."               "Congo, Rep."                   
##  [39] "Colombia"                       "Comoros"                       
##  [41] "Cabo Verde"                     "Costa Rica"                    
##  [43] "Curacao"                        "Cayman Islands"                
##  [45] "Cyprus"                         "Czechia"                       
##  [47] "Germany"                        "Djibouti"                      
##  [49] "Dominica"                       "Denmark"                       
##  [51] "Dominican Republic"             "Algeria"                       
##  [53] "Ecuador"                        "Egypt, Arab Rep."              
##  [55] "Euro area"                      "Eritrea"                       
##  [57] "Spain"                          "Estonia"                       
##  [59] "Ethiopia"                       "Finland"                       
##  [61] "Fiji"                           "France"                        
##  [63] "Faroe Islands"                  "Micronesia, Fed. Sts."         
##  [65] "Gabon"                          "United Kingdom"                
##  [67] "Georgia"                        "Ghana"                         
##  [69] "Guinea"                         "Gambia, The"                   
##  [71] "Guinea-Bissau"                  "Equatorial Guinea"             
##  [73] "Greece"                         "Grenada"                       
##  [75] "Guatemala"                      "Guyana"                        
##  [77] "Hong Kong SAR, China"           "Honduras"                      
##  [79] "Croatia"                        "Haiti"                         
##  [81] "Hungary"                        "Indonesia"                     
##  [83] "India"                          "Ireland"                       
##  [85] "Iran, Islamic Rep."             "Iraq"                          
##  [87] "Iceland"                        "Israel"                        
##  [89] "Italy"                          "Jamaica"                       
##  [91] "Jordan"                         "Japan"                         
##  [93] "Kazakhstan"                     "Kenya"                         
##  [95] "Kyrgyz Republic"                "Cambodia"                      
##  [97] "Kiribati"                       "St. Kitts and Nevis"           
##  [99] "Korea, Rep."                    "Kuwait"                        
## [101] "Lao PDR"                        "Lebanon"                       
## [103] "Liberia"                        "Libya"                         
## [105] "St. Lucia"                      "Sri Lanka"                     
## [107] "Lesotho"                        "Lithuania"                     
## [109] "Luxembourg"                     "Latvia"                        
## [111] "Macao SAR, China"               "Morocco"                       
## [113] "Moldova"                        "Madagascar"                    
## [115] "Maldives"                       "Mexico"                        
## [117] "Marshall Islands"               "North Macedonia"               
## [119] "Mali"                           "Malta"                         
## [121] "Myanmar"                        "Montenegro"                    
## [123] "Mongolia"                       "Mozambique"                    
## [125] "Mauritania"                     "Mauritius"                     
## [127] "Malawi"                         "Malaysia"                      
## [129] "Namibia"                        "New Caledonia"                 
## [131] "Niger"                          "Nigeria"                       
## [133] "Nicaragua"                      "Netherlands"                   
## [135] "Norway"                         "Nepal"                         
## [137] "Nauru"                          "New Zealand"                   
## [139] "Oman"                           "Pakistan"                      
## [141] "Panama"                         "Peru"                          
## [143] "Philippines"                    "Palau"                         
## [145] "Papua New Guinea"               "Poland"                        
## [147] "Portugal"                       "Paraguay"                      
## [149] "West Bank and Gaza"             "French Polynesia"              
## [151] "Qatar"                          "Romania"                       
## [153] "Russian Federation"             "Rwanda"                        
## [155] "Saudi Arabia"                   "Sudan"                         
## [157] "Senegal"                        "Singapore"                     
## [159] "Solomon Islands"                "Sierra Leone"                  
## [161] "El Salvador"                    "San Marino"                    
## [163] "Serbia"                         "South Sudan"                   
## [165] "Sao Tome and Principe"          "Suriname"                      
## [167] "Slovak Republic"                "Slovenia"                      
## [169] "Sweden"                         "Eswatini"                      
## [171] "Sint Maarten (Dutch part)"      "Seychelles"                    
## [173] "Syrian Arab Republic"           "Turks and Caicos Islands"      
## [175] "Chad"                           "Togo"                          
## [177] "Thailand"                       "Tajikistan"                    
## [179] "Timor-Leste"                    "Tonga"                         
## [181] "Trinidad and Tobago"            "Tunisia"                       
## [183] "Turkiye"                        "Tuvalu"                        
## [185] "Tanzania"                       "Uganda"                        
## [187] "Ukraine"                        "Uruguay"                       
## [189] "United States"                  "Uzbekistan"                    
## [191] "St. Vincent and the Grenadines" "Venezuela, RB"                 
## [193] "Viet Nam"                       "Vanuatu"                       
## [195] "Samoa"                          "Kosovo"                        
## [197] "Yemen, Rep."                    "South Africa"                  
## [199] "Zambia"                         "Zimbabwe"

# remove the unneeded rows 
current_account_balance <- current_account_balance %>%
  mutate(Country = trimws(Country)) %>%
  filter(!grepl(paste0(
    "^Sub-Saharan Africa \\(excluding high income\\)$|",
    "^Sub-Saharan Africa \\(IDA & IBRD countries\\)$|", 
    "^Upper middle income$|",
    "^South Asia \\(IDA & IBRD\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(IDA & IBRD\\)$|", 
    "^Latin America & the Caribbean \\(IDA & IBRD countries\\)$|",
    "^Europe & Central Asia \\(IDA & IBRD countries\\)$|", 
    "^East Asia & Pacific \\(IDA & IBRD countries\\)$|",
    "^World$|",
    "^Post-demographic dividend$|",
    "^Small states$|",
    "^Pacific island small states$|",
    "^Pre-demographic dividend$|",
    "^Other small states$|",
    "^OECD members$|",
    "^Middle East, North Africa, Afghanistan & Pakistan \\(excluding high income\\)$|",
    "^Middle East, North Africa, Afghanistan & Pakistan$|",
    "^Middle income$|",
    "^Late-demographic dividend$|", 
    "^Low & middle income$|",
    "^Lower middle income$|",
    "^Low income$|",
    "^Least developed countries: UN classification$|", 
    "^Latin America & Caribbean$|",
    "^Latin America & Caribbean \\(excluding high income\\)$|",
    "^IDA only$|", 
    "^IDA blend$|",
    "^IDA total$|",
    "^IDA & IBRD total$|",
    "^IBRD only$|",
    "^Heavily indebted poor countries \\(HIPC\\)$|", 
    "^High income$|",
    "^Fragile and conflict affected situations$|",
    "^Europe & Central Asia$|",
    "^East Asia & Pacific$|", 
    "^Europe & Central Asia \\(excluding high income\\)$|",
    "^East Asia & Pacific \\(excluding high income\\)$|", 
    "^Caribbean small states$|",
    "^Central Europe and the Baltics$|",
    "^Arab World$|",
    "^Africa Western and Central$|", 
    "^Africa Eastern and Southern$|",
    "^Sub-Saharan Africa$|","^West Bank and Gaza$"
  ), Country, ignore.case = TRUE))


#remove deplicates

current_account_balance <- current_account_balance %>%
  distinct()

#see if theres any duplicates left

current_account_balance_dup <- current_account_balance %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",current_account_balance_dup))

## [1] "Number of duplicate rows: 0"

Clean unemplyment

unemployment <- read_csv("unemployment.csv")

## Rows: 439 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (54): Country Name, Country Code, Series Name, Series Code, 1975 [YR1975...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#rename the colomns 

unemployment <- unemployment %>%
  rename("Country" = "Country Name",
         "Economic_Indicator" = "Series Name",
         "Country_Code" = "Country Code",
         "Indicator_Code" = "Series Code")



#clean colomns name and pivot to longer



unemployment <- unemployment %>%
  rename_with(~ str_remove(., "\\s*\\[YR\\d+\\]"), matches("\\d{4}")) %>%
  pivot_longer(cols = matches("^\\d{4}$"), # Match exactly 4 digits at start and end
              names_to = "Years",
              values_to = "Values") %>%
  dplyr::select(Country, Country_Code, Economic_Indicator, Indicator_Code, Years, Values) %>%
  mutate(Values = as.numeric(Values)) %>% #remove null values
  drop_na()

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Values = as.numeric(Values)`.
## Caused by warning:
## ! NAs introduced by coercion

#remove deplicates

unemployment <- unemployment %>%
  distinct()

#see if theres any duplicates left

unemployment_dup <- unemployment %>%
  duplicated() %>%
  sum() 
print(paste("Number of duplicate rows:",unemployment_dup))

## [1] "Number of duplicate rows: 0"

Clean the following dataset before merging

#lending rate

lending_rate <- read_csv("lending_rate.csv")

## Rows: 4709 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

lending_rate <- lending_rate %>% 
  dplyr::mutate(Years = as.character(Years))


#deposit rate 

deposit_rate <- read_csv("deposit_rate.csv")

## Rows: 4888 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (2): Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

deposit_rate <- deposit_rate %>% 
  dplyr::mutate(Years = as.character(Years))


#interest rate spread

interest_rate_spread <- read_csv("interest_rate_spread.csv")

## Rows: 4988 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

interest_rate_spread <- interest_rate_spread %>%
  dplyr::mutate(Years = as.character(Years))


#interest rate

real_interest_rate <- read_csv("real_interest_rate.csv")

## Rows: 4681 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

real_interest_rate <- real_interest_rate %>%
  dplyr::mutate(Years = as.character(Years))



#risk premium

risk_premium <- read_csv("risk_premium.csv")

## Rows: 2413 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

risk_premium <- risk_premium %>% 
  dplyr::mutate(Years = as.character(Years))


#bank on none performance loan

bank_on_none_perfomance_loan <- read_csv("bank_on_none_perfomance_loan.csv")

## Rows: 2105 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Country_Code, Indicator_Name, Indicator_Code
## dbl (3): ...1, Years, Values
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

bank_on_none_perfomance_loan <- bank_on_none_perfomance_loan %>%
  dplyr::mutate(Years = as.character(Years))

Merge the data sets together

Global_Rates <- gdp_current_us %>%
  dplyr::select(Country, Country_Code, Years, GDP_current_us = Values) %>%
  
  left_join(
    gdp_per_capita %>% 
      dplyr::select(Country, Country_Code, Years, GDP_per_capita_growth = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    inflation_consumer_price %>% 
      dplyr::select(Country, Country_Code, Years, Inflation = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    lending_rate %>% 
      dplyr::select(Country, Country_Code, Years, Lending_rate = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    deposit_rate %>% 
      dplyr::select(Country, Country_Code, Years, Deposit_rate = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    current_account_balance %>% 
      dplyr::select(Country, Country_Code, Years, Current_account_balance = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    exchange_rate %>% 
      dplyr::select(Country, Country_Code, Years, Exchange_rate = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    bank_on_none_perfomance_loan %>% 
      dplyr::select(Country, Country_Code, Years, Non_performing_loans = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    private_sector_credit %>% 
      dplyr::select(Country, Country_Code, Years, Private_sector_credit = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    unemployment %>% 
      dplyr::select(Country, Country_Code, Years, Unemployment = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    interest_rate_spread %>% 
      dplyr::select(Country, Country_Code, Years, Interest_rate_spread = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    real_interest_rate %>% 
      dplyr::select(Country, Country_Code, Years, Real_interest_rate = Values),
    by = c("Country", "Country_Code", "Years")
  ) %>%
  
  left_join(
    risk_premium %>% 
      dplyr::select(Country, Country_Code, Years, Risk_premium = Values),
    by = c("Country", "Country_Code", "Years")
  )

Clean the dataset and save

glimpse(Global_Rates)

## Rows: 15,329
## Columns: 16
## $ Country                 <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "…
## $ Country_Code            <chr> "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW…
## $ Years                   <chr> "1986", "1987", "1988", "1989", "1990", "1991"…
## $ GDP_current_us          <dbl> 405586592, 487709497, 596648045, 695530726, 76…
## $ GDP_per_capita_growth   <dbl> NA, 17.59320595, 18.30468737, 10.06693198, 0.1…
## $ Inflation               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Lending_rate            <dbl> 11.00000, 10.30000, 10.24167, 10.47500, 10.600…
## $ Deposit_rate            <dbl> 6.400000, 6.583333, 6.700000, 6.700000, 6.7000…
## $ Current_account_balance <dbl> NA, -18659217.9, -22569832.4, -44301676.0, -46…
## $ Exchange_rate           <dbl> NA, 1.79, 1.79, 1.79, 1.79, 1.79, 1.79, 1.79, …
## $ Non_performing_loans    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Private_sector_credit   <dbl> 39.46433, 39.44192, 43.77566, 43.35639, 44.488…
## $ Unemployment            <dbl> NA, NA, NA, NA, NA, 5.92, NA, NA, 5.36, NA, NA…
## $ Interest_rate_spread    <dbl> NA, 4.600000, 3.716667, 3.541667, 3.775000, 3.…
## $ Real_interest_rate      <dbl> NA, 6.475434, 6.918180, 6.264234, 4.566641, 4.…
## $ Risk_premium            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

#remove the null values
Global_Rates <- Global_Rates %>%
  na.omit()



#remove all duplicates

Global_Rates <- Global_Rates %>%
  distinct()




#save the Global_rate dataset

write.csv(Global_Rates, "Global_Rates.csv", row.names = FALSE)

Data Cleaning Process for Macroeconomic Risk Using Global Indicators

Nsovo Ntuli

2026-01-15