Urban Infra Cleaning
#adding and pivoting data before merging all country level in formation in one data frame
gain <- read_csv("00_data_raw/Readiness Vulnerability Index/gain/gain.csv")%>%
pivot_longer(cols = '1995':'2020',
names_to = "year",
values_to = "gain")
readiness <- read_csv("00_data_raw/Readiness Vulnerability Index/readiness/readiness.csv")%>%
pivot_longer(cols = '1995':'2020',
names_to = "year",
values_to = "readiness")
vulnerability <- read_csv("00_data_raw/Readiness Vulnerability Index/vulnerability/vulnerability.csv")%>%
pivot_longer(cols = '1995':'2020',
names_to = "year",
values_to = "vulnerablity")
infrastructure <- read_csv("00_data_raw/Readiness Vulnerability Index/vulnerability/infrastructure.csv")%>%
pivot_longer(cols = '1995':'2020',
names_to = "year",
values_to = "infrastructure")
slum_percentage_urban <- read_csv("00_data_raw/WB_DATA_Slum_Percentage/Slum_Percentage_Urban.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")
names(slum_percentage_urban) <- NULL
names(slum_percentage_urban) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")
slum_percentage_urban <- slum_percentage_urban%>%
pivot_longer(cols = '1960':'2021',
names_to = "year",
values_to = "slum_percent")
urban_population_growth <- read_csv("00_data_raw/urban_population_growth.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")
names(urban_population_growth) <- NULL
names(urban_population_growth) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")
urban_population_growth <- urban_population_growth%>%
pivot_longer(cols = '1960':'2021',
names_to = "year",
values_to = "pop_growth")
urban_population <- read_csv("00_data_raw/urban_population.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")
names(urban_population) <- NULL
names(urban_population) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")
urban_population <- urban_population%>%
pivot_longer(cols = '1960':'2021',
names_to = "year",
values_to = "pop")
urban_water_usage <- read_csv("00_data_raw/urban_water_usage.csv")%>%
dplyr::select(-"Country Name", -"Series Name", -"Series Code")
names(urban_water_usage) <- NULL
names(urban_water_usage) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")
urban_water_usage<- urban_water_usage%>%
pivot_longer(cols = '1960':'2021',
names_to = "year",
values_to = "water_usage")
electricity_access <- read_csv("00_data_raw/electricity_access.csv")%>%
dplyr:: select(-"Country Name", -"Series Name", -"Series Code")
names(electricity_access) <- NULL
names(electricity_access) <- c("ISO3","1960", "1961", "1962", "1963", "1964" ,"1965" ,"1966", "1967" ,"1968", "1969", "1970", "1971", "1972" ,"1973", "1974" ,"1975", "1976", "1977","1978", "1979", "1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988" ,"1989", "1990" ,"1991" ,"1992" ,"1993", "1994","1995","1996", "1997", "1998" ,"1999","2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")
electricity_access <- electricity_access %>%
pivot_longer(cols = '1960':'2021',
names_to = "year",
values_to = "electricty_acc")
region <- read_excel("~/Documents/R_Docs/Practice/Data_cleaning/POP_TOTAL_WB.xlsx")%>%
dplyr::select(-"SpecialNotes", -"TableName")%>%
rename( 'ISO3' = 'Country Code')
country_climate_fin <- read_excel("00_data_raw/ODL-Export-countries-1680648565747.xlsx")%>%
dplyr::select(-'SIDS', -'LDCs',-'Region')%>%
rename( Name = 'Country Name')
EIU <- read_excel("00_data_raw/EIU.xlsx")%>%
dplyr::select(code, "1996", "1998" ,"2000","2002", "2003", "2004", "2005", "2006", "2007", "2008" ,"2009" ,"2010", "2011","2012" ,"2013","2014" ,"2015" ,"2016" ,"2017", "2018", "2019", "2020", "2021")%>%
pivot_longer(cols = '1996':'2021',
names_to = "year",
values_to = "demo_index")%>%
rename(ISO3 = "code")#merging individual df to country data
country_data<- merge(gain, electricity_access, by = c('ISO3','year'))
country_data<- merge(country_data, region, by = 'ISO3')
country_data<- merge(country_data, urban_water_usage, by = c('ISO3','year'))
country_data<- merge(country_data, urban_population, by = c('ISO3','year'))
country_data<- merge(country_data, urban_population_growth, by = c('ISO3','year'))
country_data<- merge(country_data, infrastructure, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, readiness, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, vulnerability, by = c('ISO3','year', 'Name'))
country_data<- merge(country_data, slum_percentage_urban, by = c('ISO3','year'))
country_data<- merge(country_data, country_climate_fin, by = c('ISO3','Name'))
country_data <- country_data %>%
filter (year > 2001)
EIU <- EIU %>%
filter (year > 2001)
country_data<- merge(country_data, EIU, by = c('ISO3', 'year'))
#adding categories to indicate 1 to 3 levels of priority based on index values
country_data <- country_data %>%
filter(pop_growth>0)%>%
mutate(slum_prio = case_when(slum_percent < 30 ~ 1, slum_percent < 49 ~ 2 ,slum_percent > 49 ~ 3),
prio_vul = case_when(vulnerablity <.37 ~ 1, vulnerablity < .49 ~2, vulnerablity > .50 ~ 3),
prio_readiness = case_when(readiness< .32 ~ 3, readiness < .51 ~ 2 , readiness > .52 ~ 1),
EIU_prio = case_when(demo_index < .30 ~ 3, demo_index <.49 ~ 2, demo_index < .50 ~ 1),
prio_grow_pop = case_when(pop_growth < 2 ~ 1, pop_growth < 6 ~ 2, pop_growth < 7 ~ 3))%>%
dplyr::select(ISO3, Name, year, Region, IncomeGroup, slum_percent, slum_prio, vulnerablity, prio_vul, readiness, prio_readiness, demo_index, EIU_prio, pop_growth, pop, prio_grow_pop)
write_csv(country_data,"/Users/sebastiengettelman/Documents/R_Docs/Final Paper/01_data_cleaning/country_data.csv")
kable(head(country_data))| ISO3 | Name | year | Region | IncomeGroup | slum_percent | slum_prio | vulnerablity | prio_vul | readiness | prio_readiness | demo_index | EIU_prio | pop_growth | pop | prio_grow_pop |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AFG | Afghanistan | 2002 | South Asia | Low income | .. | 1 | 0.6072268 | 3 | 0.3229413 | 2 | NA | NA | 6.86345297807244 | 4674867 | 3 |
| AFG | Afghanistan | 2003 | South Asia | Low income | .. | 1 | 0.5944043 | 3 | 0.3270357 | 2 | NA | NA | 7.95344768841497 | 5061866 | NA |
| AFG | Afghanistan | 2004 | South Asia | Low income | .. | 1 | 0.5964283 | 3 | 0.3223744 | 2 | 0.0968750 | 3 | 4.58865325754718 | 5299549 | 2 |
| AFG | Afghanistan | 2005 | South Asia | Low income | .. | 1 | 0.6032747 | 3 | 0.2697370 | 3 | 0.0802083 | 3 | 4.47468958963317 | 5542073 | 2 |
| AFG | Afghanistan | 2006 | South Asia | Low income | 63.6 | 3 | 0.6062524 | 3 | 0.2362718 | 3 | 0.0692500 | 3 | 5.03421600301046 | 5828215 | 2 |
| AFG | Afghanistan | 2007 | South Asia | Low income | .. | 1 | 0.6017442 | 3 | 0.2305408 | 3 | 0.3060000 | 2 | 2.68846842633562 | 5987030 | 2 |
#uploading and renaming variables for infrastructure project data, these data will still need to be cleaned and explored.
infra_projects <- read_excel("00_data_raw/Infra-Projects.xlsx")%>%
rename(Name = "Country")
write_csv(infra_projects,"/Users/sebastiengettelman/Documents/R_Docs/Final Paper/01_data_cleaning/infra-projects.csv")
kable(head(infra_projects))| Region | Name | IncomeGroup | IDA Status | Financial closure year | Financial closure Month | Project name | RelatedNames | Type of PPI | Subtype of PPI | Project status | Primary sector | Subsector | Segment | Location | ContractPeriod | GovtGrantingContract | DirectGovtSupport | DirectGovtSupportValue | InDirectGovtSupport | InDirectGovtSupportValue | Total Equity | InvestmentYear | PercentPrivate | FeesToGovernment | PhysicalAssets | TotalInvestment | CapacityType | Capacity | Technology | RelatedProjects | BidCriteria | AwardMethod | NumberOfBids | Sponsors | Sponsors Country | Main Revenue Source | Other Revenue Source | MultiLateralSupport | BiLateralSupport | TotalDebtFunding | DebtEquityGrantRatio | ProjectBanks | UnsolicitedProposal | PublicDisclosure |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| East Asia and Pacific | Cambodia | Low income | IDA | 1999 | April | Jupiter Power Battambang | NA | Greenfield project | Build, operate, and transfer | Active | Energy | Electricity | Electricity generation | Battambang | 15 | Not Available | Not Available | NA | Not Available | NA | NA | 1999 | 100 | 0 | 10 | 10 | MW | 15 | Diesel | NA | NA | Not Available | NA | Maxim Power Corporation (Not Available / 51% ), | ||||||||||
| _x000d_Caterpillar Inc. (Not Available / 49% ) | Canada, | |||||||||||||||||||||||||||||||||||||||||||
| _x000d_United States | Not Applicable | N/A | No | No | NA | NA | OPIC (Not Available / Not Available / $Not Available) | Yes | No | |||||||||||||||||||||||||||||||||||
| East Asia and Pacific | Cambodia | Low income | IDA | 1999 | April | Jupiter Power Battambang | NA | Greenfield project | Build, operate, and transfer | Active | Energy | Electricity | Electricity generation | Battambang | 15 | Not Available | Not Available | NA | Not Available | NA | NA | 2002 | 100 | 0 | 2.1 | 2.1 | MW | 22 | Diesel | NA | NA | Not Available | NA | Maxim Power Corporation (Not Available / 51% ), | ||||||||||
| _x000d_Caterpillar Inc. (Not Available / 49% ) | Canada, | |||||||||||||||||||||||||||||||||||||||||||
| _x000d_United States | Not Applicable | N/A | No | No | NA | NA | OPIC (Not Available / Not Available / $Not Available) | Yes | No | |||||||||||||||||||||||||||||||||||
| East Asia and Pacific | Cambodia | Low income | IDA | 1999 | November | Jupiter Power Phnom Penh | C1 - Phnom Penh | Greenfield project | Build, own, and operate | Active | Energy | Electricity | Electricity generation | Phnom Penh | Not Available | Not Available | Not Available | NA | Not Available | NA | NA | 1999 | 100 | 0 | 6 | 6 | MW | 15 | Diesel | NA | NA | Not Available | NA | Maxim Power Corporation (Not Available / 51% ), | ||||||||||
| _x000d_Caterpillar Inc. (Not Available / 49% ) | Canada, | |||||||||||||||||||||||||||||||||||||||||||
| _x000d_United States | Not Applicable | N/A | No | No | NA | NA | OPIC (Not Available / Not Available / $Not Available) | Yes | No | |||||||||||||||||||||||||||||||||||
| East Asia and Pacific | Cambodia | Low income | IDA | 1999 | November | Jupiter Power Phnom Penh | C1 - Phnom Penh | Greenfield project | Build, own, and operate | Active | Energy | Electricity | Electricity generation | Phnom Penh | Not Available | Not Available | Not Available | NA | Not Available | NA | NA | 2002 | 100 | 0 | 2 | 2 | MW | 22 | Diesel | NA | NA | Not Available | NA | Maxim Power Corporation (Not Available / 51% ), | ||||||||||
| _x000d_Caterpillar Inc. (Not Available / 49% ) | Canada, | |||||||||||||||||||||||||||||||||||||||||||
| _x000d_United States | Not Applicable | N/A | No | No | NA | NA | OPIC (Not Available / Not Available / $Not Available) | Yes | No | |||||||||||||||||||||||||||||||||||
| East Asia and Pacific | Cambodia | Low income | IDA | 2001 | May | Kampong Cham City Diesel Plant | NA | Greenfield project | Build, operate, and transfer | Active | Energy | Electricity | Electricity generation | Kampong Cham City | 25 | National | Not Available | NA | Not Available | NA | NA | 2001 | 100 | 0 | 8 | 8 | MW | 20 | Diesel | NA | NA | Not Available | NA | Global Technological Support Bhd (Not Available / 100% ) | Malaysia | Not Applicable | N/A | No | No | NA | NA | EXIM Bank Malaysia (Not Available / Not Available / $Not Available) | Yes | No |
| East Asia and Pacific | Cambodia | Low income | IDA | 2001 | April | Kirirom 1 and 3 Hydropower Stations | Kirium Hydro Station Project | Brownfield | Rehabilitate, operate, and transfer | Active | Energy | Electricity | Electricity generation | Kirirom Area | 32 | National | Not Available | NA | Not Available | NA | NA | 2001 | 100 | 0 | 20 | 20 | MW | 12 | Hydro, Small (<50MW) | NA | NA | Direct negotiation | NA | China Electric Power Technology Import and Export Corp (Not Available / 100% ) | China | Not Applicable | N/A | No | No | NA | NA | EX-IM Bank of China (Not Available / Not Available / $Not Available) | Yes | No |