This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#
# 1| Preparation --------------------------------------------------------
# 1.1| Libraries --------------------------------------------------------
rm(list = ls())
graphics.off()
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 551658 29.5 1232958 65.9 686414 36.7
## Vcells 1066613 8.2 8388608 64.0 1876611 14.4
# Paste start time
startTime = Sys.time()
myPackages = c('broom','caret', 'cluster', 'clValid', 'cobalt', 'colorspace', 'data.table', 'descr', 'dplyr', 'extrafont', 'factoextra', 'FactoMineR', 'fastDummies', 'foreign', 'fpc', 'gbm', 'geosphere', 'ggdendro', 'ggparty', 'ggplot2', 'ggpubr', 'ggspatial', 'ggmap', 'glmnet', 'gridExtra', 'gtools', 'haven', 'here', 'Hmisc', 'igraph', 'Metrics', 'openxlsx', 'partykit', 'PCAmixdata', 'ppcor', 'questionr', 'raster', 'RColorBrewer', 'readr', 'readxl', 'reshape2', 'rpart', 'rpart.plot', 'scales', 'sf', 'shadowtext', 'spatstat', 'stars', 'StatMatch', 'stringr', 'survey', 'tidyr', 'tidyverse', 'treemapify', 'writexl', 'eeptools', 'lubridate', 'lattice', 'sfsmisc')
notInstalled = myPackages[!(myPackages %in% rownames(installed.packages()))]
if(length(notInstalled)) {
install.packages(notInstalled)
}
invisible(sapply(myPackages, library, character.only = TRUE, quietly = TRUE))
## Warning: package 'ggplot2' was built under R version 4.4.2
## cobalt (Version 4.5.5, Build Date: 2024-04-02)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Registering fonts with R
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## Loaded glmnet 4.1-8
##
##
## Attaching package: 'gridExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## combine
##
##
##
## Attaching package: 'gtools'
##
##
## The following object is masked from 'package:glmnet':
##
## na.replace
##
##
## here() starts at C:/Users/10282476/OneDrive - United Nations/Desktop/RMarkdwon_test
##
##
## Attaching package: 'Hmisc'
##
##
## The following object is masked from 'package:ggdendro':
##
## label
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
##
## Attaching package: 'igraph'
##
##
## The following object is masked from 'package:gtools':
##
## permute
##
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
##
## The following object is masked from 'package:clValid':
##
## clusters
##
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
##
## The following object is masked from 'package:base':
##
## union
##
##
##
## Attaching package: 'Metrics'
##
##
## The following objects are masked from 'package:caret':
##
## precision, recall
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'questionr'
##
##
## The following objects are masked from 'package:Hmisc':
##
## describe, wtd.mean, wtd.table, wtd.var
##
##
## The following object is masked from 'package:descr':
##
## freq
## Warning: package 'raster' was built under R version 4.4.2
##
## Attaching package: 'raster'
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'reshape2'
##
## The following objects are masked from 'package:data.table':
##
## dcast, melt
##
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:readr':
##
## col_factor
## Warning: package 'sf' was built under R version 4.4.2
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
## spatstat.geom 3.2-9
##
## Attaching package: 'spatstat.geom'
##
## The following object is masked from 'package:scales':
##
## rescale
##
## The following objects are masked from 'package:raster':
##
## area, rotate, shift
##
## The following object is masked from 'package:MASS':
##
## area
##
## The following objects are masked from 'package:igraph':
##
## diameter, edges, is.connected, vertices
##
## The following objects are masked from 'package:ggpubr':
##
## border, rotate
##
## The following object is masked from 'package:grid':
##
## as.mask
##
## The following object is masked from 'package:geosphere':
##
## perimeter
##
## The following object is masked from 'package:data.table':
##
## shift
##
## The following object is masked from 'package:colorspace':
##
## coords
##
## The following object is masked from 'package:cluster':
##
## volume
##
## spatstat.random 3.2-3
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:raster':
##
## getData
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## spatstat.explore 3.2-7
##
## Attaching package: 'spatstat.explore'
##
## The following object is masked from 'package:Metrics':
##
## auc
##
## The following object is masked from 'package:lattice':
##
## panel.histogram
##
## spatstat.model 3.2-11
##
## Attaching package: 'spatstat.model'
##
## The following object is masked from 'package:lattice':
##
## panel.histogram
##
## spatstat.linnet 3.1-5
##
## spatstat 3.0-8
## For an introduction to spatstat, type 'beginner'
##
##
## Attaching package: 'proxy'
##
## The following object is masked from 'package:raster':
##
## as.matrix
##
## The following object is masked from 'package:Matrix':
##
## as.matrix
##
## The following objects are masked from 'package:stats':
##
## as.dist, dist
##
## The following object is masked from 'package:base':
##
## as.matrix
##
##
## Attaching package: 'survival'
##
## The following object is masked from 'package:caret':
##
## cluster
##
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:raster':
##
## cv
##
## The following object is masked from 'package:Hmisc':
##
## deff
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
## Attaching package: 'tidyr'
##
## The following object is masked from 'package:reshape2':
##
## smiths
##
## The following object is masked from 'package:raster':
##
## extract
##
## The following object is masked from 'package:igraph':
##
## crossing
##
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
##
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ purrr 1.0.2
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ dplyr::between() masks data.table::between()
## ✖ scales::col_factor() masks readr::col_factor()
## ✖ nlme::collapse() masks dplyr::collapse()
## ✖ gridExtra::combine() masks dplyr::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ purrr::discard() masks scales::discard()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ tidyr::extract() masks raster::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::lift() masks caret::lift()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ raster::select() masks MASS::select(), dplyr::select()
## ✖ purrr::simplify() masks igraph::simplify()
## ✖ Hmisc::src() masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'eeptools' was built under R version 4.4.2
## Warning: package 'sfsmisc' was built under R version 4.4.2
##
## Attaching package: 'sfsmisc'
##
## The following object is masked from 'package:spatstat.geom':
##
## xy.grid
##
## The following object is masked from 'package:Hmisc':
##
## errbar
##
## The following object is masked from 'package:dplyr':
##
## last
##
## The following object is masked from 'package:data.table':
##
## last
loadfonts(device = 'win', quiet = T)
options(scipen = 999) # Disable scientific notation.
# 1.2| Initial values locations and folders ---------------------------------------------------
# 1: English.
# 0: Another.
language = 1
# Specify location of file
userLocation = enc2native(here()) # Replace by your own path.
# Create output folders if not available to save the output in
if(!file.exists("Output")){
dir.create("Output")
}
if(!file.exists("Output/Chapter 05")){
dir.create("Output/Chapter 05")
}
if(!file.exists("Output/Profiling")){
dir.create("Output/Profiling")
}
if(!file.exists("Output/2")){
dir.create("Output/Chapter 02")
}
## Warning in dir.create("Output/Chapter 02"): 'Output\Chapter 02' already exists
if(!file.exists("Output/Chapter 03")){
dir.create("Output/Chapter 03")
}
# Location of Input and Code
scriptLocation = paste0(userLocation, '/Code/')
inputLocation = paste0(userLocation, '/Input/')
#Call output location for this chapter
outputLocation = paste0(userLocation, '/Output/Chapter 05/')
# Paste end time
endTime = Sys.time()
endTime - startTime
## Time difference of 22.94405 secs
st = Sys.time()
outputLocation = paste0(userLocation, '/Output/Chapter 05/')
##1.3 Uploading main datasets: Individual, Household Data, and beneficiaries ---------------------------------------------------
IndividualData=read_excel(paste0(inputLocation, 'Bases/DATA_BASES.xlsx'), sheet = 'Individuals',na = "NULL")
HH_Data=read_excel(paste0(inputLocation, 'Bases/DATA_BASES.xlsx'), sheet = 'Households',na = "NULL")
IndividualData = IndividualData %>%
mutate(`Schooling level`=if_else(`Schooling level`%in%c("illiterat"), "illiterate",`Schooling level`))
IndividualData = IndividualData %>%
mutate(Age = trunc((as.Date(`Date of birth`) %--% as.Date(Sys.Date()) / years(1) )))
Beneficiaries_Data=read.csv(paste0(inputLocation, 'Bases/Beneficiaries.csv'), na = "NULL")
# 1.4 Uploading dictionary - Labels ---------------------------------------------------
dataPlots <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'),
sheet = 'Labels')
dataPlots[is.na(dataPlots)] <- '' #Replacing NAs with characters
# 1.5| Uploading Dictionary -labels------------------------------------------------------
#Referring each graph to its specific labels in the dictionary
IndVarNames <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'A8:D33'))
HHVarNames <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'F8:I51'))
Benificiaries_VarNames<- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'P14:S23'))
GenderLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'A1:D3'))
MarriageLabel<- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'U14:X18'))
YesNoLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'F1:I3'))
EducationAttainment <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'K1:N5'))
HighestEducation <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'P1:S9'))
EmploymentStatus <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'U1:X4'))
EmploymentType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'Z1:AC7'))
EmploymentRegularity <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AE1:AH5'))
EmploymentSector <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AJ1:AM4'))
DwellingType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AO1:AR12')) #from to
DwellingOwnership <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AT1:AW5')) #from to
DwellingWalls <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AY1:BB7')) #from to
DamageLevel <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BD1:BG4')) #from to
DwellingRoof <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BI1:BL6')) #from to
DwellingFloor <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BN1:BQ6')) #from to
CookingFuel <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BS1:BV5')) #from to
ToiletType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BX1:CA6')) #from to
WaterSource <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CC1:CF6')) #from to
ElectricitySource <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CH1:CK4')) #from to
SewageWater <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CM1:CP4')) #from to
HHProperties <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CR1:CU3')) #from to
AgriculturalLand <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CW1:CZ5')) #from to
LocationLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'DB1:DE4')) #from to
GovernorateLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'DG1:DJ9')) #from to
## 1.6 Uploading Dictionary - Weights
DwellingTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'A2:C46') %>% #from to
rename(`House type`=Dwelling_type, `Contract type`= Dwelling_ownership, var_Dwelling_type=weight)
WallTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'E2:G20')%>% #from to
rename(`Wall material`=Wall_type, `Wall quality`= Damage_level, var_Wall_type=weight)
RoofTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'I2:K17')%>% #from to
rename(`Roof material`=Roof_type, `Roof quality`= Damage_level, var_Roof_type=weight)
FloorTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'M2:O17')%>% #from to
rename(`Floor material`=Floor_type, `Floor quality`= Damage_level, var_Floor_type=weight)
CookingfuelWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Q2:R6')%>% #from to
rename(`Cooking source`=Cooking_fuel, var_Cooking_fuel=weight)
ToiletTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'T2:U7')%>% #from to
rename(`Toilet type`=Toilet_type, var_Toilet_Type=weight)
WaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'W2:X7')%>% #from to
rename(`Water`=Water_source, var_Drinking_water=weight)
DrinkingWaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'W9:X14')%>% #from to
rename(`Drinking water`=Drinking_water_source, var_water=weight)
SewageWaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Z9:AA12')%>% #from to
rename(`Sewers`=Sewage_water, var_sewage=weight)
ElectricitySourceWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Z2:AA5')%>% #from to
rename(`Electricity source`=Electricity_source, var_electricity=weight)
LivestockWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AC2:AE8')%>% #from to
rename(`Livestock`=Livestock,`Location`= Location, var_livestock=weight)
ReaslestateWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AG2:AI8')%>% #from to
rename(`Realstate`=Realestate, `Location`= Location, var_Realestate=weight)
AgriLandWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AK2:AM14')%>% #from to
rename(`Agriculture land`=Agricultural_land,`Location`= Location, var_AgriLand=weight)
# 1.7 Update names of the columns of main datasets
colnames(IndividualData)=as.character(factor(colnames(IndividualData),
levels = IndVarNames[,2],
labels = IndVarNames[,4-1]))
colnames(HH_Data)=as.character(factor(colnames(HH_Data),
levels = HHVarNames[,2],
labels = HHVarNames[,4-1]))
colnames(Beneficiaries_Data)=as.character(factor(colnames(Beneficiaries_Data),
levels = Benificiaries_VarNames[,2],
labels = Benificiaries_VarNames[,4-1]))
HH_Data=HH_Data[,!is.na(colnames(HH_Data))]
# 1.8 Convert categorical variables of individuals dataset into factors.
# Also, in case another language wants to be used as a label for the data, this code will update to the language selected (in this case, language 1 is English, 0 is Arabic).
IndividualData$`Gender`=as.character(factor(IndividualData$`Gender`,
levels = GenderLabels[,2],
labels = GenderLabels[,4-language]))
IndividualData$`School attendance`=as.character(factor(IndividualData$`School attendance`,
levels = EducationAttainment[,2],
labels = EducationAttainment[,4-language]))
IndividualData$`Schooling level`=as.character(factor(IndividualData$`Schooling level`,
levels = HighestEducation[,2],
labels = HighestEducation[,4-language]))
IndividualData$`Marital status`=as.character(factor(IndividualData$`Marital status`,
levels = MarriageLabel[,2],
labels = MarriageLabel[,4-language]))
IndividualData$Age=as.numeric(IndividualData$Age)
IndividualData$`Chronic disease`=as.character(factor(IndividualData$`Chronic disease`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Nutrition Disease`=as.character(factor(IndividualData$`Nutrition Disease`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Common Disease`=as.character(factor(IndividualData$`Common Disease`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Total disability`=as.character(factor(IndividualData$`Total disability`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Partial disability`=as.character(factor(IndividualData$`Partial disability`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Other disability`=as.character(factor(IndividualData$`Other disability`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Polio`=as.character(factor(IndividualData$`Polio`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Measels`=as.character(factor(IndividualData$`Measels`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Diphtheria`=as.character(factor(IndividualData$`Diphtheria`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Health insurance`=as.character(factor(IndividualData$`Health insurance`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
IndividualData$`Employment Status`=as.character(factor(IndividualData$`Employment Status`,
levels = EmploymentStatus[,2],
labels = EmploymentStatus[,4-language]))
IndividualData$`Job Type`=as.character(factor(IndividualData$`Job Type`,
levels = EmploymentType[,2],
labels = EmploymentType[,4-language]))
IndividualData$`Contract type`=as.character(factor(IndividualData$`Contract type`,
levels = EmploymentRegularity[,2],
labels = EmploymentRegularity[,4-language]))
IndividualData$`Sector`=as.character(factor(IndividualData$`Sector`,
levels = EmploymentSector[,2],
labels = EmploymentSector[,4-language]))
#1.9 Creation of Relevant Variables from Individual to Household Datasets
t1=IndividualData%>%group_by(HHID)%>%
mutate(hhSize=length(HHID),
var_insurance=if_else(`Health insurance`%in%c("No"),1,0),
var_chronic=if_else(`Chronic disease`%in%c("Yes"),1,0),
var_nutrition=if_else(`Nutrition Disease`%in%c("Yes"),1,0),
var_common=if_else(`Common Disease`%in%c("Yes"),1,0),
var_tot_disab=if_else(`Total disability`%in%c("Yes"),1,0),
var_part_disab=if_else(`Partial disability`%in%c("Yes"),1,0),
var_other_disab=if_else(`Other disability`%in%c("Yes"),1,0),
var_polio=if_else(`Polio`%in%c("Yes"),1,0),
var_diphtheria=if_else(`Diphtheria`%in%c("Yes"),1,0),
var_measels=if_else(`Measels` %in%c("Yes"), 1, 0),
#Age=trunc((as.Date(`Date of birth`) %--% as.Date("2023-08-28")) / years(1)),
school_attendance=if_else(Age>6 & `School attendance`%in%c("was enrolled and dropped out","haven't been enrolled before"),1,0),
members_15above=if_else(Age>15 ,1,0),
illiterate=if_else(Age>15 & `Schooling level`%in%c("illiterate"),1,0),
high_education=if_else(Age>18 & `Schooling level`%in%c("post-secondary diploma","university and above","high-school"),1,0),
kids13_below_enrolled=if_else(Age<13 & `School attendance`%in%c("currently enrolled"),1,0),
kids13_below=if_else(Age<13 ,1,0),
inactive=if_else(Age>15 & Age <25 & `Employment Status`%in%c("inactive") & `School attendance`%in%c("was enrolled and dropped out","haven't been enrolled before"),1,0),
members15_25= if_else(Age>15 & Age <25,1,0),
members18_65= if_else(Age>18 & Age <65,1,0),
econ_dependency=if_else(`Job Type`%in%c("paid in cash")|`Job Type`%in%c("paid both cash and in-kind"),1,0),
workers18_65=if_else((`Employment Status`%in%c("employed") & Age>18 & Age<65),1,0),
workers=if_else((`Employment Status`%in%c("employed")),1,0),
unemployment= if_else((`Employment Status`%in%c("unemployed") & Age>18 & Age<65),1,0),
temporary=if_else(`Contract type`%in%c("temporary","seasonal"),1,0),
child_labour=if_else(`Employment Status`%in%c("employed") & Age<18,1,0),
members18_below=if_else(Age<18,1,0),
female=if_else(`Gender`%in%c("Female"),1,0),
old_dependency= if_else(Age>65,1,0),
young_dependency=if_else(Age<18,1,0)
)%>%
summarise(hhSize=length(HHID),
var_insurance=100*mean(var_insurance, rm.na=F),
var_chronic=100*mean(var_chronic, rm.na=F),
var_nutrition=100*mean(var_nutrition, rm.na=F),
var_common=100*mean(var_common, rm.na=F),
var_tot_disab=100*mean(var_tot_disab, rm.na=F),
var_part_disab=100*mean(var_part_disab, rm.na=F),
var_other_disab=100*mean(var_other_disab, rm.na=F),
var_polio=100*mean(var_polio, rm.na=F),
var_diphtheria=100*mean(var_diphtheria, rm.na=F),
var_measels=100*mean(var_measels, rm.na=F),
var_enrolment=100*mean(school_attendance, rm.na=F),
var_illiteracy_rate=100*sum(illiterate)/(sum(members_15above)),
var_high_education_rate=100 - 100*mean(high_education, rm.na=F),
var_kids_outside_school=100-100*(sum(kids13_below_enrolled))/(sum(kids13_below)),
var_NEET=100*sum(inactive)/sum(members15_25),
var_econ_dependency=100-(100*mean(econ_dependency)),
var_unemployment_rate= 100*sum(unemployment)/(sum(unemployment)+sum(workers18_65)),
var_temporary_rate=100*sum(temporary)/(sum(workers)),
var_child_labour=100*sum(child_labour)/(sum(members18_below)),
var_female_ratio=100*mean(female),
var_old_dependency_ratio=(100*sum(old_dependency)/sum(members18_65))/2,
var_young_dependency_ratio=(100*sum(young_dependency)/sum(members18_65))/3) %>%
mutate(
var_illiteracy_rate=if_else(is.nan(var_illiteracy_rate),100,var_illiteracy_rate),
var_kids_outside_school=if_else(is.nan(var_kids_outside_school),0,var_kids_outside_school),
var_NEET=if_else(is.nan(var_NEET),0,var_NEET),
var_unemployment_rate=if_else(is.nan(var_unemployment_rate),100,var_unemployment_rate),
var_temporary_rate=if_else(is.nan(var_temporary_rate),100,var_temporary_rate),
var_child_labour=if_else(is.nan(var_child_labour),0,var_child_labour),
var_female_ratio=if_else(is.nan(var_female_ratio),100,var_female_ratio),
var_old_dependency_ratio=if_else(is.nan(var_old_dependency_ratio) | var_old_dependency_ratio>100,100,var_old_dependency_ratio),
var_young_dependency_ratio=if_else(is.nan(var_young_dependency_ratio) | var_young_dependency_ratio>100,100,var_young_dependency_ratio),
)
#### 1.10 Repeat 18. for Households dataset dataset
HH_Data$`Household members`=as.numeric(HH_Data$`Household members`)
HH_Data$`Number of rooms`=as.numeric(HH_Data$`Number of rooms`)
HH_Data$`House type`=as.character(factor(HH_Data$`House type`,
levels = DwellingType[,2],
labels = DwellingType[,4-language]))
HH_Data$`Contract type`=as.character(factor(HH_Data$`Contract type`,
levels = DwellingOwnership[,2],
labels = DwellingOwnership[,4-language]))
HH_Data$`Wall material`=as.character(factor(HH_Data$`Wall material`,
levels = DwellingWalls[,2],
labels = DwellingWalls[,4-language]))
HH_Data$`Wall quality`=as.character(factor(HH_Data$`Wall quality`,
levels = DamageLevel[,2],
labels = DamageLevel[,4-language]))
HH_Data$`Roof material`=as.character(factor(HH_Data$`Roof material`,
levels = DwellingRoof[,2],
labels = DwellingRoof[,4-language]))
HH_Data$`Roof quality`=as.character(factor(HH_Data$`Roof quality`,
levels = DamageLevel[,2],
labels = DamageLevel[,4-language]))
HH_Data$`Floor material`=as.character(factor(HH_Data$`Floor material`,
levels = DwellingFloor[,2],
labels = DwellingFloor[,4-language]))
HH_Data$`Floor quality`=as.character(factor(HH_Data$`Floor quality`,
levels = DamageLevel[,2],
labels = DamageLevel[,4-language]))
HH_Data$`Cooking source`=as.character(factor(HH_Data$`Cooking source`,
levels = CookingFuel[,2],
labels = CookingFuel[,4-language]))
HH_Data$`Toilet type`=as.character(factor(HH_Data$`Toilet type`,
levels = ToiletType[,2],
labels = ToiletType[,4-language]))
HH_Data$`Drinking water`=as.character(factor(HH_Data$`Drinking water`,
levels = WaterSource[,2],
labels = WaterSource[,4-language]))
HH_Data$`Water`=as.character(factor(HH_Data$`Water`,
levels = WaterSource[,2],
labels = WaterSource[,4-language]))
HH_Data$`Electricity source`=as.character(factor(HH_Data$`Electricity source`,
levels = ElectricitySource[,2],
labels = ElectricitySource[,4-language]))
HH_Data$`Sewers`=as.character(factor(HH_Data$`Sewers`,
levels = SewageWater[,2],
labels = SewageWater[,4-language]))
HH_Data$`Time to the market`=as.numeric(HH_Data$`Time to the market`)
HH_Data$`Time to school`=as.numeric(HH_Data$`Time to school`)
HH_Data$`Time to hospital`=as.numeric(HH_Data$`Time to hospital`)
HH_Data$`Kitchen`=as.character(factor(HH_Data$`Kitchen`,
levels = HHProperties[,2],
labels = HHProperties[,4-language]))
HH_Data$`Internet`=as.character(factor(HH_Data$`Internet`,
levels = HHProperties[,2],
labels = HHProperties[,4-language]))
HH_Data$`Heater`=as.character(factor(HH_Data$`Heater`,
levels = HHProperties[,2],
labels = HHProperties[,4-language]))
HH_Data$`Sewing Machine`=as.character(factor(HH_Data$`Sewing Machine`,
levels = HHProperties[,2],
labels = HHProperties[,4-language]))
HH_Data$`Solar Panel`=as.character(factor(HH_Data$`Solar Panel`,
levels = HHProperties[,2],
labels = HHProperties[,4-language]))
HH_Data$`TV`=as.numeric(HH_Data$`TV`)
HH_Data$`Mobile phone`=as.numeric(HH_Data$`Mobile phone`)
HH_Data$`Car`=as.numeric(HH_Data$`Car`)
HH_Data$`Fridge`=as.numeric(HH_Data$`Fridge`)
HH_Data$`Washing machine`=as.numeric(HH_Data$`Washing machine`)
HH_Data$`AC`=as.numeric(HH_Data$`AC`)
HH_Data$`Computer`=as.numeric(HH_Data$`Computer`)
HH_Data$`Bike`=as.numeric(HH_Data$`Bike`)
HH_Data$`Automatic washing machine`=as.numeric(HH_Data$`Automatic washing machine`)
HH_Data$`Livestock`=as.character(factor(HH_Data$`Livestock`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
HH_Data$`Location`=as.character(factor(HH_Data$`Location`,
levels = LocationLabels[,2],
labels = LocationLabels[,4-language]))
HH_Data$`Realstate`=as.character(factor(HH_Data$`Realstate`,
levels = YesNoLabels[,2],
labels = YesNoLabels[,4-language]))
HH_Data$`Agriculture land`=as.character(factor(HH_Data$`Agriculture land`,
levels = AgriculturalLand[,2],
labels = AgriculturalLand[,4-language]))
HH_Data$`Governorate`=as.character(factor(HH_Data$`Governorate`,
levels = GovernorateLabels[,2],
labels = GovernorateLabels[,4-language]))
HH_Data$`Location`=as.character(factor(HH_Data$`Location`,
levels = LocationLabels[,2],
labels = LocationLabels[,4-language]))
Indicators=HH_Data%>%
left_join(DwellingTypeWeights) %>%
left_join(WallTypeWeights) %>%
left_join(FloorTypeWeights) %>%
left_join(RoofTypeWeights) %>%
left_join(CookingfuelWeights) %>%
left_join(ToiletTypeWeights) %>%
left_join(WaterWeights) %>%
left_join(DrinkingWaterWeights) %>%
left_join(ElectricitySourceWeights) %>%
left_join(SewageWaterWeights) %>%
left_join(LivestockWeights) %>%
left_join(ReaslestateWeights) %>%
left_join(AgriLandWeights) %>%
mutate(peoplePerRoom=`Household members`/`Number of rooms`,
var_overcrowdingIssue=if_else(peoplePerRoom>3,1,0),
var_duration_market=if_else(`Time to the market`>60,100,0),
var_duration_school=if_else(`Time to school`>60,100,0),
var_duration_hospital=if_else(`Time to hospital`>60,100,0),
kitchen=if_else(`Kitchen`%in%c("Yes"),0,100),
television=if_else(`TV`>0,0,90),
phone=if_else(`Mobile phone`>0,0,90),
carr=if_else(`Car`>0,0,80),
fridge=if_else(`Fridge`>0,0,70),
washing_machine=if_else(`Washing machine`>0,0,60),
AC=if_else(`AC`>0,0,60),
computer=if_else(`Computer`>0,0,50),
internet=if_else(`Internet`%in%c("Yes"),0,50),
heater=if_else(`Heater`%in%c("Yes"),0,50),
sewing_machine=if_else(`Sewing Machine`%in%c("Yes"),0,40),
solar=if_else(`Solar Panel`%in%c("Yes"),0,10),
bike=if_else(`Bike`>0,0,10),
auto_wash_machine=if_else(`Automatic washing machine`>0,0,10),
var_Property_score= 100*(kitchen+television+phone+carr+fridge+washing_machine+AC+computer+internet+heater+sewing_machine+solar+bike+auto_wash_machine)/770,
)
## Joining with `by = join_by(`House type`, `Contract type`)`
## Joining with `by = join_by(`Wall material`, `Wall quality`)`
## Joining with `by = join_by(`Floor material`, `Floor quality`)`
## Joining with `by = join_by(`Roof material`, `Roof quality`)`
## Joining with `by = join_by(`Cooking source`)`
## Joining with `by = join_by(`Toilet type`)`
## Joining with `by = join_by(Water)`
## Joining with `by = join_by(`Drinking water`)`
## Joining with `by = join_by(`Electricity source`)`
## Joining with `by = join_by(Sewers)`
## Joining with `by = join_by(Livestock, Location)`
## Joining with `by = join_by(Realstate, Location)`
## Joining with `by = join_by(`Agriculture land`, Location)`
#### COMBINE ####
completeData= Indicators %>%
left_join(t1) %>%
dplyr::select(c("HHID"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
completeData_Vuln= Indicators %>%
left_join(t1) %>%
dplyr::select(c("HHID", "Governorate", "Location"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
#selecting variables we need
PMTIndicators=completeData %>%
dplyr::select(c("HHID"), "var_overcrowdingIssue", "var_Wall_type","var_Floor_type", "var_Toilet_Type", "var_Drinking_water", "var_sewage",
"var_Property_score", "var_AgriLand",
"var_chronic","var_common", "var_measels", "var_tot_disab", "var_other_disab", "var_duration_hospital",
"var_enrolment", "var_high_education_rate", "var_duration_school",
"var_NEET", "var_unemployment_rate", "var_child_labour",
"var_female_ratio", "var_young_dependency_ratio")
# 2.3 Calculate the score
SCORING = PMTIndicators %>%
mutate(
s_var_overcrowdingIssue=(1/6)*(1/7)*var_overcrowdingIssue, # the value next to each variable is its weight. in this case we specified the weight but in other cases we have to stick to the weight given
s_var_Wall=(1/6)*(1/7)*var_Wall_type,
s_var_Floor=(1/6)*(1/7)*var_Floor_type,
s_var_Toilet=(1/6)*(1/7)*var_Toilet_Type,
s_var_DrinkWater=(1/6)*(1/7)*var_Drinking_water,
s_var_Sewage=(1/6)*(1/7)*var_sewage,
s_var_Property=(1/2)*(1/7)*var_Property_score,
s_var_agriLand=(1/2)*(1/7)*var_AgriLand,
s_var_chronic=(1/6)*(1/7)*var_chronic,
s_var_common=(1/6)*(1/7)*var_common,
s_var_measels=(1/6)*(1/7)*var_measels,
s_var_totdisab=(1/6)*(1/7)*var_tot_disab,
s_var_otherdisab=(1/6)*(1/7)*var_other_disab,
s_var_durationHospital=(1/6)*(1/7)*var_duration_hospital,
s_var_enrolment=(1/3)*(1/7)*var_enrolment,
s_var_higheducation=(1/3)*(1/7)*var_high_education_rate,
s_var_durationSchool=(1/3)*(1/7)*var_duration_school,
s_var_NEET=(1/3)*(1/7)*var_NEET,
s_var_unemp=(1/3)*(1/7)*var_unemployment_rate,
s_var_childLabour=(1/3)*(1/7)*var_child_labour,
s_var_female=(1/2)*(1/7)*var_female_ratio,
s_var_youngdependency=(1/2)*(1/7)*var_young_dependency_ratio,
score = s_var_overcrowdingIssue + s_var_Wall + s_var_Floor + s_var_Toilet + s_var_DrinkWater + s_var_Sewage +
s_var_Property + s_var_agriLand + s_var_chronic + s_var_common + s_var_measels + s_var_totdisab + s_var_otherdisab +
s_var_durationHospital + s_var_enrolment+ s_var_higheducation + s_var_durationSchool + s_var_NEET + s_var_unemp +
s_var_childLabour +s_var_female + s_var_youngdependency )%>%
drop_na()
#2.4 Add the new SCORING variable to the different datasets
#important note is to consider the head of the household not the whole family
filtered_Head = IndividualData%>%filter(`Relationship with HH`%in% c(1)) %>%
dplyr::select(c("HHID", "Gender", "Marital status", "Schooling level", "Employment Status"))
completeData_Vuln= completeData_Vuln %>%
left_join(SCORING) %>%
dplyr::select(c("HHID", "Governorate", "Location","score"), starts_with("var_"))
## Joining with `by = join_by(HHID, var_Wall_type, var_Floor_type,
## var_Toilet_Type, var_Drinking_water, var_sewage, var_AgriLand,
## var_overcrowdingIssue, var_duration_school, var_duration_hospital,
## var_Property_score, var_chronic, var_common, var_tot_disab, var_other_disab,
## var_measels, var_enrolment, var_high_education_rate, var_NEET,
## var_unemployment_rate, var_child_labour, var_female_ratio,
## var_young_dependency_ratio)`
completeData_Vuln= completeData_Vuln %>%
left_join(filtered_Head) %>%
dplyr::select(c("HHID", "Governorate", "Location","score", "Gender", "Marital status", "Schooling level", "Employment Status"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
wb= openxlsx::createWorkbook(creator='ESCWA')
newSheet = addWorksheet(wb, sheetName = "Age vs. HH members")
MergedData= merge(IndividualData,HH_Data,by="HHID")