Urban Infra Cleaning

#adding and pivoting data before merging all country level in formation in one data frame

gain <- read_csv("00_data_raw/Readiness Vulnerability Index/gain/gain.csv")%>%
  pivot_longer(cols = '1995':'2020', 
        names_to = "year", 
        values_to = "gain")

readiness <- read_csv("00_data_raw/Readiness Vulnerability Index/readiness/readiness.csv")%>%
  pivot_longer(cols = '1995':'2020', 
        names_to = "year", 
        values_to = "readiness")

vulnerability <- read_csv("00_data_raw/Readiness Vulnerability Index/vulnerability/vulnerability.csv")%>%
  pivot_longer(cols = '1995':'2020', 
        names_to = "year", 
        values_to = "vulnerablity")
  
infrastructure <- read_csv("00_data_raw/Readiness Vulnerability Index/vulnerability/infrastructure.csv")%>%
  pivot_longer(cols = '1995':'2020', 
        names_to = "year", 
        values_to = "infrastructure")

slum_percentage_urban <- read_csv("00_data_raw/WB_DATA_Slum_Percentage/Slum_Percentage_Urban.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")

names(slum_percentage_urban) <- NULL
names(slum_percentage_urban) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")

slum_percentage_urban <- slum_percentage_urban%>%
pivot_longer(cols = '1960':'2021', 
        names_to = "year", 
        values_to = "slum_percent")

urban_population_growth <- read_csv("00_data_raw/urban_population_growth.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")

names(urban_population_growth) <- NULL
names(urban_population_growth) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")

urban_population_growth <- urban_population_growth%>%
pivot_longer(cols = '1960':'2021', 
        names_to = "year", 
        values_to = "pop_growth")

urban_population <- read_csv("00_data_raw/urban_population.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")

names(urban_population) <- NULL
names(urban_population) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")

urban_population <- urban_population%>%
pivot_longer(cols = '1960':'2021', 
        names_to = "year", 
        values_to = "pop")

urban_water_usage <- read_csv("00_data_raw/urban_water_usage.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")

names(urban_water_usage) <- NULL
names(urban_water_usage) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")

urban_water_usage<- urban_water_usage%>%
pivot_longer(cols = '1960':'2021', 
        names_to = "year", 
        values_to = "water_usage")

electricity_access <- read_csv("00_data_raw/electricity_access.csv")%>%
dplyr:: select(-"Country Name", -"Series Name", -"Series Code")

names(electricity_access) <- NULL
names(electricity_access) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")

electricity_access <- electricity_access %>%
pivot_longer(cols = '1960':'2021', 
        names_to = "year", 
        values_to = "electricty_acc")

region <- read_excel("~/Documents/R_Docs/Practice/Data_cleaning/POP_TOTAL_WB.xlsx")%>%
  dplyr::select(-"SpecialNotes", -"TableName")%>%
    rename( 'ISO3' = 'Country Code')

country_climate_fin <- read_excel("00_data_raw/ODL-Export-countries-1680648565747.xlsx")%>%
  dplyr::select(-'SIDS', -'LDCs',-'Region')%>%
  rename( Name = 'Country Name')
  

EIU <- read_excel("00_data_raw/EIU.xlsx")%>%
  dplyr::select(code, "1996", "1998" ,"2000","2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")%>%
  pivot_longer(cols = '1996':'2021', 
        names_to = "year", 
        values_to = "demo_index")%>%
  rename(ISO3 = "code")
#merging individual df to country data
country_data<- merge(gain, electricity_access, by = c('ISO3','year'))
country_data<- merge(country_data, region, by = 'ISO3')
country_data<- merge(country_data, urban_water_usage, by = c('ISO3','year'))
country_data<- merge(country_data, urban_population, by = c('ISO3','year'))
country_data<- merge(country_data, urban_population_growth, by = c('ISO3','year'))
country_data<- merge(country_data, infrastructure, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, readiness, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, vulnerability, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, slum_percentage_urban, by = c('ISO3','year'))
country_data<- merge(country_data, country_climate_fin, by = c('ISO3','Name'))
country_data <- country_data %>%
  filter (year > 2001)
EIU <- EIU %>%
  filter (year > 2001)
country_data<- merge(country_data, EIU, by = c('ISO3', 'year'))

#adding categories to indicate 1 to 3 levels of priority based on index values
country_data <- country_data %>%
  filter(pop_growth>0)%>%
   mutate(slum_prio = case_when(slum_percent < 30 ~ 1, slum_percent < 49 ~ 2 ,slum_percent > 49 ~ 3), 
          prio_vul = case_when(vulnerablity <.37 ~ 1, vulnerablity < .49 ~2, vulnerablity > .50 ~ 3), 
          prio_readiness = case_when(readiness< .32 ~ 3, readiness < .51 ~ 2 , readiness > .52 ~ 1), 
          EIU_prio = case_when(demo_index < .30 ~ 3, demo_index <.49 ~ 2, demo_index < .50 ~ 1), 
          prio_grow_pop = case_when(pop_growth < 2 ~ 1, pop_growth < 6 ~ 2, pop_growth < 7 ~ 3))%>%
  dplyr::select(ISO3, Name, year, Region, IncomeGroup, slum_percent, slum_prio, vulnerablity, prio_vul, readiness, prio_readiness, demo_index, EIU_prio, pop_growth, pop, prio_grow_pop)
write_csv(country_data,"/Users/sebastiengettelman/Documents/R_Docs/Final Paper/01_data_cleaning/country_data.csv")


kable(head(country_data))
ISO3 Name year Region IncomeGroup slum_percent slum_prio vulnerablity prio_vul readiness prio_readiness demo_index EIU_prio pop_growth pop prio_grow_pop
AFG Afghanistan 2002 South Asia Low income .. 1 0.6072268 3 0.3229413 2 NA NA 6.86345297807244 4674867 3
AFG Afghanistan 2003 South Asia Low income .. 1 0.5944043 3 0.3270357 2 NA NA 7.95344768841497 5061866 NA
AFG Afghanistan 2004 South Asia Low income .. 1 0.5964283 3 0.3223744 2 0.0968750 3 4.58865325754718 5299549 2
AFG Afghanistan 2005 South Asia Low income .. 1 0.6032747 3 0.2697370 3 0.0802083 3 4.47468958963317 5542073 2
AFG Afghanistan 2006 South Asia Low income 63.6 3 0.6062524 3 0.2362718 3 0.0692500 3 5.03421600301046 5828215 2
AFG Afghanistan 2007 South Asia Low income .. 1 0.6017442 3 0.2305408 3 0.3060000 2 2.68846842633562 5987030 2
#uploading and renaming variables for infrastructure project data, these data will still need to be cleaned and explored. 
infra_projects <- read_excel("00_data_raw/Infra-Projects.xlsx")%>%
 rename(Name = "Country")
write_csv(infra_projects,"/Users/sebastiengettelman/Documents/R_Docs/Final Paper/01_data_cleaning/infra-projects.csv")

kable(head(infra_projects))
Region Name IncomeGroup IDA Status Financial closure year Financial closure Month Project name RelatedNames Type of PPI Subtype of PPI Project status Primary sector Subsector Segment Location ContractPeriod GovtGrantingContract DirectGovtSupport DirectGovtSupportValue InDirectGovtSupport InDirectGovtSupportValue Total Equity InvestmentYear PercentPrivate FeesToGovernment PhysicalAssets TotalInvestment CapacityType Capacity Technology RelatedProjects BidCriteria AwardMethod NumberOfBids Sponsors Sponsors Country Main Revenue Source Other Revenue Source MultiLateralSupport BiLateralSupport TotalDebtFunding DebtEquityGrantRatio ProjectBanks UnsolicitedProposal PublicDisclosure
East Asia and Pacific Cambodia Low income IDA 1999 April Jupiter Power Battambang NA Greenfield project Build, operate, and transfer Active Energy Electricity Electricity generation Battambang 15 Not Available Not Available NA Not Available NA NA 1999 100 0 10 10 MW 15 Diesel NA NA Not Available NA Maxim Power Corporation (Not Available / 51% ),
_x000d_Caterpillar Inc. (Not Available / 49% ) Canada,
_x000d_United States Not Applicable N/A No No NA NA OPIC (Not Available / Not Available / $Not Available) Yes No
East Asia and Pacific Cambodia Low income IDA 1999 April Jupiter Power Battambang NA Greenfield project Build, operate, and transfer Active Energy Electricity Electricity generation Battambang 15 Not Available Not Available NA Not Available NA NA 2002 100 0 2.1 2.1 MW 22 Diesel NA NA Not Available NA Maxim Power Corporation (Not Available / 51% ),
_x000d_Caterpillar Inc. (Not Available / 49% ) Canada,
_x000d_United States Not Applicable N/A No No NA NA OPIC (Not Available / Not Available / $Not Available) Yes No
East Asia and Pacific Cambodia Low income IDA 1999 November Jupiter Power Phnom Penh C1 - Phnom Penh Greenfield project Build, own, and operate Active Energy Electricity Electricity generation Phnom Penh Not Available Not Available Not Available NA Not Available NA NA 1999 100 0 6 6 MW 15 Diesel NA NA Not Available NA Maxim Power Corporation (Not Available / 51% ),
_x000d_Caterpillar Inc. (Not Available / 49% ) Canada,
_x000d_United States Not Applicable N/A No No NA NA OPIC (Not Available / Not Available / $Not Available) Yes No
East Asia and Pacific Cambodia Low income IDA 1999 November Jupiter Power Phnom Penh C1 - Phnom Penh Greenfield project Build, own, and operate Active Energy Electricity Electricity generation Phnom Penh Not Available Not Available Not Available NA Not Available NA NA 2002 100 0 2 2 MW 22 Diesel NA NA Not Available NA Maxim Power Corporation (Not Available / 51% ),
_x000d_Caterpillar Inc. (Not Available / 49% ) Canada,
_x000d_United States Not Applicable N/A No No NA NA OPIC (Not Available / Not Available / $Not Available) Yes No
East Asia and Pacific Cambodia Low income IDA 2001 May Kampong Cham City Diesel Plant NA Greenfield project Build, operate, and transfer Active Energy Electricity Electricity generation Kampong Cham City 25 National Not Available NA Not Available NA NA 2001 100 0 8 8 MW 20 Diesel NA NA Not Available NA Global Technological Support Bhd (Not Available / 100% ) Malaysia Not Applicable N/A No No NA NA EXIM Bank Malaysia (Not Available / Not Available / $Not Available) Yes No
East Asia and Pacific Cambodia Low income IDA 2001 April Kirirom 1 and 3 Hydropower Stations Kirium Hydro Station Project Brownfield Rehabilitate, operate, and transfer Active Energy Electricity Electricity generation Kirirom Area 32 National Not Available NA Not Available NA NA 2001 100 0 20 20 MW 12 Hydro, Small (<50MW) NA NA Direct negotiation NA China Electric Power Technology Import and Export Corp (Not Available / 100% ) China Not Applicable N/A No No NA NA EX-IM Bank of China (Not Available / Not Available / $Not Available) Yes No