#CHAPTER 05 AT THE MOMENT!! CHANGE TITLE EVNTUALLY

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#
# 1| Preparation --------------------------------------------------------
# 1.1| Libraries --------------------------------------------------------

rm(list = ls())
graphics.off()
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  551658 29.5    1232958 65.9   686414 36.7
## Vcells 1066613  8.2    8388608 64.0  1876611 14.4
# Paste start time
startTime  = Sys.time()

myPackages   = c('broom','caret', 'cluster', 'clValid', 'cobalt', 'colorspace', 'data.table', 'descr', 'dplyr', 'extrafont', 'factoextra', 'FactoMineR', 'fastDummies', 'foreign', 'fpc', 'gbm', 'geosphere', 'ggdendro', 'ggparty', 'ggplot2', 'ggpubr', 'ggspatial', 'ggmap', 'glmnet', 'gridExtra', 'gtools', 'haven', 'here', 'Hmisc', 'igraph', 'Metrics', 'openxlsx', 'partykit', 'PCAmixdata', 'ppcor', 'questionr', 'raster', 'RColorBrewer', 'readr', 'readxl', 'reshape2', 'rpart', 'rpart.plot', 'scales', 'sf', 'shadowtext', 'spatstat', 'stars', 'StatMatch', 'stringr', 'survey', 'tidyr', 'tidyverse', 'treemapify', 'writexl', 'eeptools', 'lubridate', 'lattice', 'sfsmisc')

notInstalled = myPackages[!(myPackages  %in%  rownames(installed.packages()))]
if(length(notInstalled)) {
  install.packages(notInstalled)
}
invisible(sapply(myPackages, library, character.only = TRUE, quietly = TRUE))
## Warning: package 'ggplot2' was built under R version 4.4.2
##  cobalt (Version 4.5.5, Build Date: 2024-04-02)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Registering fonts with R
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## Loaded glmnet 4.1-8
## 
## 
## Attaching package: 'gridExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## 
## 
## Attaching package: 'gtools'
## 
## 
## The following object is masked from 'package:glmnet':
## 
##     na.replace
## 
## 
## here() starts at C:/Users/10282476/OneDrive - United Nations/Desktop/RMarkdwon_test
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following object is masked from 'package:ggdendro':
## 
##     label
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## 
## Attaching package: 'igraph'
## 
## 
## The following object is masked from 'package:gtools':
## 
##     permute
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## 
## The following object is masked from 'package:clValid':
## 
##     clusters
## 
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## 
## The following object is masked from 'package:base':
## 
##     union
## 
## 
## 
## Attaching package: 'Metrics'
## 
## 
## The following objects are masked from 'package:caret':
## 
##     precision, recall
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'questionr'
## 
## 
## The following objects are masked from 'package:Hmisc':
## 
##     describe, wtd.mean, wtd.table, wtd.var
## 
## 
## The following object is masked from 'package:descr':
## 
##     freq
## Warning: package 'raster' was built under R version 4.4.2
## 
## Attaching package: 'raster'
## 
## The following object is masked from 'package:MASS':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'reshape2'
## 
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
## 
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## Warning: package 'sf' was built under R version 4.4.2
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
## spatstat.geom 3.2-9
## 
## Attaching package: 'spatstat.geom'
## 
## The following object is masked from 'package:scales':
## 
##     rescale
## 
## The following objects are masked from 'package:raster':
## 
##     area, rotate, shift
## 
## The following object is masked from 'package:MASS':
## 
##     area
## 
## The following objects are masked from 'package:igraph':
## 
##     diameter, edges, is.connected, vertices
## 
## The following objects are masked from 'package:ggpubr':
## 
##     border, rotate
## 
## The following object is masked from 'package:grid':
## 
##     as.mask
## 
## The following object is masked from 'package:geosphere':
## 
##     perimeter
## 
## The following object is masked from 'package:data.table':
## 
##     shift
## 
## The following object is masked from 'package:colorspace':
## 
##     coords
## 
## The following object is masked from 'package:cluster':
## 
##     volume
## 
## spatstat.random 3.2-3
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:raster':
## 
##     getData
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## spatstat.explore 3.2-7
## 
## Attaching package: 'spatstat.explore'
## 
## The following object is masked from 'package:Metrics':
## 
##     auc
## 
## The following object is masked from 'package:lattice':
## 
##     panel.histogram
## 
## spatstat.model 3.2-11
## 
## Attaching package: 'spatstat.model'
## 
## The following object is masked from 'package:lattice':
## 
##     panel.histogram
## 
## spatstat.linnet 3.1-5
## 
## spatstat 3.0-8 
## For an introduction to spatstat, type 'beginner' 
## 
## 
## Attaching package: 'proxy'
## 
## The following object is masked from 'package:raster':
## 
##     as.matrix
## 
## The following object is masked from 'package:Matrix':
## 
##     as.matrix
## 
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## 
## The following object is masked from 'package:base':
## 
##     as.matrix
## 
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:caret':
## 
##     cluster
## 
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:raster':
## 
##     cv
## 
## The following object is masked from 'package:Hmisc':
## 
##     deff
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## Attaching package: 'tidyr'
## 
## The following object is masked from 'package:reshape2':
## 
##     smiths
## 
## The following object is masked from 'package:raster':
## 
##     extract
## 
## The following object is masked from 'package:igraph':
## 
##     crossing
## 
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
## 
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ purrr     1.0.2
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ tibble::as_data_frame() masks igraph::as_data_frame(), dplyr::as_data_frame()
## ✖ dplyr::between()        masks data.table::between()
## ✖ scales::col_factor()    masks readr::col_factor()
## ✖ nlme::collapse()        masks dplyr::collapse()
## ✖ gridExtra::combine()    masks dplyr::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ purrr::discard()        masks scales::discard()
## ✖ tidyr::expand()         masks Matrix::expand()
## ✖ tidyr::extract()        masks raster::extract()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::first()          masks data.table::first()
## ✖ lubridate::hour()       masks data.table::hour()
## ✖ lubridate::isoweek()    masks data.table::isoweek()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ dplyr::last()           masks data.table::last()
## ✖ purrr::lift()           masks caret::lift()
## ✖ lubridate::mday()       masks data.table::mday()
## ✖ lubridate::minute()     masks data.table::minute()
## ✖ lubridate::month()      masks data.table::month()
## ✖ tidyr::pack()           masks Matrix::pack()
## ✖ lubridate::quarter()    masks data.table::quarter()
## ✖ lubridate::second()     masks data.table::second()
## ✖ raster::select()        masks MASS::select(), dplyr::select()
## ✖ purrr::simplify()       masks igraph::simplify()
## ✖ Hmisc::src()            masks dplyr::src()
## ✖ Hmisc::summarize()      masks dplyr::summarize()
## ✖ purrr::transpose()      masks data.table::transpose()
## ✖ tidyr::unpack()         masks Matrix::unpack()
## ✖ lubridate::wday()       masks data.table::wday()
## ✖ lubridate::week()       masks data.table::week()
## ✖ lubridate::yday()       masks data.table::yday()
## ✖ lubridate::year()       masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'eeptools' was built under R version 4.4.2
## Warning: package 'sfsmisc' was built under R version 4.4.2
## 
## Attaching package: 'sfsmisc'
## 
## The following object is masked from 'package:spatstat.geom':
## 
##     xy.grid
## 
## The following object is masked from 'package:Hmisc':
## 
##     errbar
## 
## The following object is masked from 'package:dplyr':
## 
##     last
## 
## The following object is masked from 'package:data.table':
## 
##     last
loadfonts(device = 'win', quiet = T)
options(scipen = 999) # Disable scientific notation.


# 1.2| Initial values locations and folders ---------------------------------------------------
# 1: English.
# 0: Another.
language = 1

# Specify location of file
userLocation   = enc2native(here()) # Replace by your own path.

# Create output folders if not available to save the output in
if(!file.exists("Output")){
  dir.create("Output")  
}
if(!file.exists("Output/Chapter 05")){
  dir.create("Output/Chapter 05")  
}
if(!file.exists("Output/Profiling")){
  dir.create("Output/Profiling")  
}
if(!file.exists("Output/2")){
  dir.create("Output/Chapter 02")  
}
## Warning in dir.create("Output/Chapter 02"): 'Output\Chapter 02' already exists
if(!file.exists("Output/Chapter 03")){
  dir.create("Output/Chapter 03")  
}

# Location of Input and Code
scriptLocation = paste0(userLocation, '/Code/')
inputLocation  = paste0(userLocation, '/Input/')
#Call output location for this chapter
outputLocation = paste0(userLocation, '/Output/Chapter 05/')

# Paste end time
endTime = Sys.time()
endTime - startTime
## Time difference of 22.94405 secs
st = Sys.time()

outputLocation = paste0(userLocation, '/Output/Chapter 05/')

##1.3 Uploading main datasets: Individual, Household Data, and beneficiaries  ---------------------------------------------------

IndividualData=read_excel(paste0(inputLocation, 'Bases/DATA_BASES.xlsx'), sheet = 'Individuals',na = "NULL")
HH_Data=read_excel(paste0(inputLocation, 'Bases/DATA_BASES.xlsx'), sheet = 'Households',na = "NULL")

IndividualData = IndividualData %>% 
  mutate(`Schooling level`=if_else(`Schooling level`%in%c("illiterat"), "illiterate",`Schooling level`))

IndividualData = IndividualData %>%
  mutate(Age = trunc((as.Date(`Date of birth`) %--% as.Date(Sys.Date()) / years(1) )))


Beneficiaries_Data=read.csv(paste0(inputLocation, 'Bases/Beneficiaries.csv'), na = "NULL")


# 1.4 Uploading dictionary - Labels  ---------------------------------------------------
dataPlots <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), 
                        sheet = 'Labels')
dataPlots[is.na(dataPlots)] <- '' #Replacing NAs with characters



# 1.5| Uploading Dictionary -labels------------------------------------------------------
#Referring each graph to its specific labels in the dictionary

IndVarNames <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'A8:D33')) 
HHVarNames <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'F8:I51')) 
Benificiaries_VarNames<- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'P14:S23'))


GenderLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'A1:D3')) 
MarriageLabel<- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'U14:X18')) 
YesNoLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'F1:I3')) 
EducationAttainment <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'K1:N5')) 
HighestEducation <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'P1:S9')) 
EmploymentStatus <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'U1:X4')) 
EmploymentType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'Z1:AC7'))
EmploymentRegularity <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AE1:AH5'))
EmploymentSector <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AJ1:AM4')) 

DwellingType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AO1:AR12')) #from to
DwellingOwnership <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AT1:AW5')) #from to
DwellingWalls <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'AY1:BB7')) #from to
DamageLevel <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BD1:BG4')) #from to
DwellingRoof <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BI1:BL6')) #from to
DwellingFloor <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BN1:BQ6')) #from to
CookingFuel <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BS1:BV5')) #from to
ToiletType <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'BX1:CA6')) #from to

WaterSource <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CC1:CF6')) #from to
ElectricitySource <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CH1:CK4')) #from to
SewageWater <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CM1:CP4')) #from to
HHProperties <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CR1:CU3')) #from to
AgriculturalLand <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'CW1:CZ5')) #from to

LocationLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'DB1:DE4')) #from to
GovernorateLabels <- as.matrix(read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Category', range = 'DG1:DJ9')) #from to


## 1.6 Uploading Dictionary - Weights
DwellingTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'A2:C46') %>%  #from to
  rename(`House type`=Dwelling_type, `Contract type`= Dwelling_ownership, var_Dwelling_type=weight)
WallTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'E2:G20')%>% #from to
  rename(`Wall material`=Wall_type, `Wall quality`= Damage_level, var_Wall_type=weight)
RoofTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'I2:K17')%>% #from to
  rename(`Roof material`=Roof_type, `Roof quality`= Damage_level, var_Roof_type=weight)
FloorTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'M2:O17')%>% #from to
  rename(`Floor material`=Floor_type, `Floor quality`= Damage_level, var_Floor_type=weight)
CookingfuelWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Q2:R6')%>% #from to
  rename(`Cooking source`=Cooking_fuel, var_Cooking_fuel=weight)
ToiletTypeWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'T2:U7')%>% #from to
  rename(`Toilet type`=Toilet_type, var_Toilet_Type=weight)
WaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'W2:X7')%>% #from to
  rename(`Water`=Water_source, var_Drinking_water=weight)
DrinkingWaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'W9:X14')%>% #from to
  rename(`Drinking water`=Drinking_water_source, var_water=weight)
SewageWaterWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Z9:AA12')%>% #from to
  rename(`Sewers`=Sewage_water, var_sewage=weight)
ElectricitySourceWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'Z2:AA5')%>% #from to
  rename(`Electricity source`=Electricity_source, var_electricity=weight)
LivestockWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AC2:AE8')%>% #from to
  rename(`Livestock`=Livestock,`Location`= Location, var_livestock=weight)
ReaslestateWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AG2:AI8')%>% #from to
  rename(`Realstate`=Realestate, `Location`= Location, var_Realestate=weight)
AgriLandWeights <- read_excel(paste0(inputLocation, 'Dictionary/Dictionary.xlsx'), sheet = 'Weights', range = 'AK2:AM14')%>% #from to
  rename(`Agriculture land`=Agricultural_land,`Location`= Location, var_AgriLand=weight)


# 1.7 Update names of the columns of main datasets

colnames(IndividualData)=as.character(factor(colnames(IndividualData),
                                          levels = IndVarNames[,2],
                                          labels = IndVarNames[,4-1]))
colnames(HH_Data)=as.character(factor(colnames(HH_Data),
                                      levels = HHVarNames[,2],
                                      labels = HHVarNames[,4-1]))

colnames(Beneficiaries_Data)=as.character(factor(colnames(Beneficiaries_Data),
                                                 levels = Benificiaries_VarNames[,2],
                                                 labels = Benificiaries_VarNames[,4-1]))

HH_Data=HH_Data[,!is.na(colnames(HH_Data))]

# 1.8 Convert categorical variables of individuals dataset into factors. 
# Also, in case another language wants to be used as a label for the data, this code will update to the language selected (in this case, language 1 is English, 0 is Arabic).
IndividualData$`Gender`=as.character(factor(IndividualData$`Gender`,
                                       levels = GenderLabels[,2],
                                       labels = GenderLabels[,4-language]))
IndividualData$`School attendance`=as.character(factor(IndividualData$`School attendance`,
                                                  levels = EducationAttainment[,2],
                                                  labels = EducationAttainment[,4-language]))
IndividualData$`Schooling level`=as.character(factor(IndividualData$`Schooling level`,
                                                    levels = HighestEducation[,2],
                                                    labels = HighestEducation[,4-language]))

IndividualData$`Marital status`=as.character(factor(IndividualData$`Marital status`,
                                                 levels = MarriageLabel[,2],
                                                 labels = MarriageLabel[,4-language]))

IndividualData$Age=as.numeric(IndividualData$Age)
IndividualData$`Chronic disease`=as.character(factor(IndividualData$`Chronic disease`,
                                         levels = YesNoLabels[,2],
                                         labels = YesNoLabels[,4-language]))
IndividualData$`Nutrition Disease`=as.character(factor(IndividualData$`Nutrition Disease`,
                                                  levels = YesNoLabels[,2],
                                                  labels = YesNoLabels[,4-language]))
IndividualData$`Common Disease`=as.character(factor(IndividualData$`Common Disease`,
                                                    levels = YesNoLabels[,2],
                                                    labels = YesNoLabels[,4-language]))
IndividualData$`Total disability`=as.character(factor(IndividualData$`Total disability`,
                                                    levels = YesNoLabels[,2],
                                                    labels = YesNoLabels[,4-language]))
IndividualData$`Partial disability`=as.character(factor(IndividualData$`Partial disability`,
                                                    levels = YesNoLabels[,2],
                                                    labels = YesNoLabels[,4-language]))
IndividualData$`Other disability`=as.character(factor(IndividualData$`Other disability`,
                                                    levels = YesNoLabels[,2],
                                                    labels = YesNoLabels[,4-language]))
IndividualData$`Polio`=as.character(factor(IndividualData$`Polio`,
                                                   levels = YesNoLabels[,2],
                                                   labels = YesNoLabels[,4-language]))
IndividualData$`Measels`=as.character(factor(IndividualData$`Measels`,
                                                   levels = YesNoLabels[,2],
                                                   labels = YesNoLabels[,4-language]))
IndividualData$`Diphtheria`=as.character(factor(IndividualData$`Diphtheria`,
                                                   levels = YesNoLabels[,2],
                                                   labels = YesNoLabels[,4-language]))
IndividualData$`Health insurance`=as.character(factor(IndividualData$`Health insurance`,
                                             levels = YesNoLabels[,2],
                                             labels = YesNoLabels[,4-language]))
IndividualData$`Employment Status`=as.character(factor(IndividualData$`Employment Status`,
                                                   levels = EmploymentStatus[,2],
                                                   labels = EmploymentStatus[,4-language]))
IndividualData$`Job Type`=as.character(factor(IndividualData$`Job Type`,
                                                    levels = EmploymentType[,2],
                                                    labels = EmploymentType[,4-language]))
IndividualData$`Contract type`=as.character(factor(IndividualData$`Contract type`,
                                           levels = EmploymentRegularity[,2],
                                           labels = EmploymentRegularity[,4-language]))
IndividualData$`Sector`=as.character(factor(IndividualData$`Sector`,
                                                levels = EmploymentSector[,2],
                                                labels = EmploymentSector[,4-language]))

#1.9 Creation of Relevant Variables from Individual to Household Datasets

t1=IndividualData%>%group_by(HHID)%>%
    mutate(hhSize=length(HHID),
           var_insurance=if_else(`Health insurance`%in%c("No"),1,0),
           var_chronic=if_else(`Chronic disease`%in%c("Yes"),1,0),
           var_nutrition=if_else(`Nutrition Disease`%in%c("Yes"),1,0),
           var_common=if_else(`Common Disease`%in%c("Yes"),1,0),
           var_tot_disab=if_else(`Total disability`%in%c("Yes"),1,0),
           var_part_disab=if_else(`Partial disability`%in%c("Yes"),1,0),
           var_other_disab=if_else(`Other disability`%in%c("Yes"),1,0),
           var_polio=if_else(`Polio`%in%c("Yes"),1,0),
           var_diphtheria=if_else(`Diphtheria`%in%c("Yes"),1,0),
           var_measels=if_else(`Measels` %in%c("Yes"), 1, 0),
           #Age=trunc((as.Date(`Date of birth`) %--% as.Date("2023-08-28")) / years(1)),
           school_attendance=if_else(Age>6 & `School attendance`%in%c("was enrolled and dropped out","haven't been enrolled before"),1,0),
           members_15above=if_else(Age>15 ,1,0),
           illiterate=if_else(Age>15 & `Schooling level`%in%c("illiterate"),1,0),
           high_education=if_else(Age>18 & `Schooling level`%in%c("post-secondary diploma","university and above","high-school"),1,0),
           kids13_below_enrolled=if_else(Age<13 & `School attendance`%in%c("currently enrolled"),1,0),
           kids13_below=if_else(Age<13 ,1,0),
           inactive=if_else(Age>15 & Age <25 & `Employment Status`%in%c("inactive") & `School attendance`%in%c("was enrolled and dropped out","haven't been enrolled before"),1,0),
           members15_25= if_else(Age>15 & Age <25,1,0),
           
           
           members18_65= if_else(Age>18 & Age <65,1,0),
           econ_dependency=if_else(`Job Type`%in%c("paid in cash")|`Job Type`%in%c("paid both cash and in-kind"),1,0),
           
           workers18_65=if_else((`Employment Status`%in%c("employed") & Age>18 & Age<65),1,0),
           workers=if_else((`Employment Status`%in%c("employed")),1,0),
           unemployment= if_else((`Employment Status`%in%c("unemployed") & Age>18 & Age<65),1,0),
           
           temporary=if_else(`Contract type`%in%c("temporary","seasonal"),1,0),
           child_labour=if_else(`Employment Status`%in%c("employed") & Age<18,1,0),
          members18_below=if_else(Age<18,1,0),
           female=if_else(`Gender`%in%c("Female"),1,0),
           old_dependency= if_else(Age>65,1,0),
           young_dependency=if_else(Age<18,1,0)
           )%>%
    summarise(hhSize=length(HHID),
              var_insurance=100*mean(var_insurance, rm.na=F),
              var_chronic=100*mean(var_chronic, rm.na=F),
              var_nutrition=100*mean(var_nutrition, rm.na=F),
              var_common=100*mean(var_common, rm.na=F),
              var_tot_disab=100*mean(var_tot_disab, rm.na=F),
              var_part_disab=100*mean(var_part_disab, rm.na=F),
              var_other_disab=100*mean(var_other_disab, rm.na=F),
              var_polio=100*mean(var_polio, rm.na=F),
              var_diphtheria=100*mean(var_diphtheria, rm.na=F),
              var_measels=100*mean(var_measels, rm.na=F),

              var_enrolment=100*mean(school_attendance, rm.na=F),
              var_illiteracy_rate=100*sum(illiterate)/(sum(members_15above)),
              var_high_education_rate=100 - 100*mean(high_education, rm.na=F),
              
              var_kids_outside_school=100-100*(sum(kids13_below_enrolled))/(sum(kids13_below)),
              
              var_NEET=100*sum(inactive)/sum(members15_25),
              
              
              var_econ_dependency=100-(100*mean(econ_dependency)),
              var_unemployment_rate= 100*sum(unemployment)/(sum(unemployment)+sum(workers18_65)),

              var_temporary_rate=100*sum(temporary)/(sum(workers)),
              var_child_labour=100*sum(child_labour)/(sum(members18_below)),
              var_female_ratio=100*mean(female),
              var_old_dependency_ratio=(100*sum(old_dependency)/sum(members18_65))/2,
              var_young_dependency_ratio=(100*sum(young_dependency)/sum(members18_65))/3) %>% 
  mutate(
    var_illiteracy_rate=if_else(is.nan(var_illiteracy_rate),100,var_illiteracy_rate),
    var_kids_outside_school=if_else(is.nan(var_kids_outside_school),0,var_kids_outside_school),
    var_NEET=if_else(is.nan(var_NEET),0,var_NEET),
    var_unemployment_rate=if_else(is.nan(var_unemployment_rate),100,var_unemployment_rate),
    var_temporary_rate=if_else(is.nan(var_temporary_rate),100,var_temporary_rate),
    var_child_labour=if_else(is.nan(var_child_labour),0,var_child_labour),
    var_female_ratio=if_else(is.nan(var_female_ratio),100,var_female_ratio),
    var_old_dependency_ratio=if_else(is.nan(var_old_dependency_ratio) | var_old_dependency_ratio>100,100,var_old_dependency_ratio),
    var_young_dependency_ratio=if_else(is.nan(var_young_dependency_ratio) | var_young_dependency_ratio>100,100,var_young_dependency_ratio),
    )
              

          
#### 1.10 Repeat 18. for Households dataset dataset 
HH_Data$`Household members`=as.numeric(HH_Data$`Household members`)
HH_Data$`Number of rooms`=as.numeric(HH_Data$`Number of rooms`)

HH_Data$`House type`=as.character(factor(HH_Data$`House type`,
                                         levels = DwellingType[,2],
                                         labels = DwellingType[,4-language]))
HH_Data$`Contract type`=as.character(factor(HH_Data$`Contract type`,
                                                 levels = DwellingOwnership[,2],
                                                 labels = DwellingOwnership[,4-language]))
HH_Data$`Wall material`=as.character(factor(HH_Data$`Wall material`,
                                             levels = DwellingWalls[,2],
                                             labels = DwellingWalls[,4-language]))
HH_Data$`Wall quality`=as.character(factor(HH_Data$`Wall quality`,
                                            levels = DamageLevel[,2],
                                            labels = DamageLevel[,4-language]))
HH_Data$`Roof material`=as.character(factor(HH_Data$`Roof material`,
                                            levels = DwellingRoof[,2],
                                            labels = DwellingRoof[,4-language]))
HH_Data$`Roof quality`=as.character(factor(HH_Data$`Roof quality`,
                                           levels = DamageLevel[,2],
                                           labels = DamageLevel[,4-language]))
HH_Data$`Floor material`=as.character(factor(HH_Data$`Floor material`,
                                            levels = DwellingFloor[,2],
                                            labels = DwellingFloor[,4-language]))
HH_Data$`Floor quality`=as.character(factor(HH_Data$`Floor quality`,
                                           levels = DamageLevel[,2],
                                           labels = DamageLevel[,4-language]))
HH_Data$`Cooking source`=as.character(factor(HH_Data$`Cooking source`,
                                           levels = CookingFuel[,2],
                                           labels = CookingFuel[,4-language]))
HH_Data$`Toilet type`=as.character(factor(HH_Data$`Toilet type`,
                                          levels = ToiletType[,2],
                                          labels = ToiletType[,4-language]))
HH_Data$`Drinking water`=as.character(factor(HH_Data$`Drinking water`,
                                           levels = WaterSource[,2],
                                           labels = WaterSource[,4-language]))
HH_Data$`Water`=as.character(factor(HH_Data$`Water`,
                                             levels = WaterSource[,2],
                                             labels = WaterSource[,4-language]))
HH_Data$`Electricity source`=as.character(factor(HH_Data$`Electricity source`,
                                    levels = ElectricitySource[,2],
                                    labels = ElectricitySource[,4-language]))

HH_Data$`Sewers`=as.character(factor(HH_Data$`Sewers`,
                                               levels = SewageWater[,2],
                                               labels = SewageWater[,4-language]))
HH_Data$`Time to the market`=as.numeric(HH_Data$`Time to the market`)
HH_Data$`Time to school`=as.numeric(HH_Data$`Time to school`)
HH_Data$`Time to hospital`=as.numeric(HH_Data$`Time to hospital`)


HH_Data$`Kitchen`=as.character(factor(HH_Data$`Kitchen`,
                                           levels = HHProperties[,2],
                                           labels = HHProperties[,4-language]))
HH_Data$`Internet`=as.character(factor(HH_Data$`Internet`,
                                      levels = HHProperties[,2],
                                      labels = HHProperties[,4-language]))
HH_Data$`Heater`=as.character(factor(HH_Data$`Heater`,
                                 levels = HHProperties[,2],
                                 labels = HHProperties[,4-language]))
HH_Data$`Sewing Machine`=as.character(factor(HH_Data$`Sewing Machine`,
                                           levels = HHProperties[,2],
                                           labels = HHProperties[,4-language]))
HH_Data$`Solar Panel`=as.character(factor(HH_Data$`Solar Panel`,
                                  levels = HHProperties[,2],
                                  labels = HHProperties[,4-language]))
HH_Data$`TV`=as.numeric(HH_Data$`TV`)
HH_Data$`Mobile phone`=as.numeric(HH_Data$`Mobile phone`)
HH_Data$`Car`=as.numeric(HH_Data$`Car`)
HH_Data$`Fridge`=as.numeric(HH_Data$`Fridge`)
HH_Data$`Washing machine`=as.numeric(HH_Data$`Washing machine`)
HH_Data$`AC`=as.numeric(HH_Data$`AC`)
HH_Data$`Computer`=as.numeric(HH_Data$`Computer`)
HH_Data$`Bike`=as.numeric(HH_Data$`Bike`)
HH_Data$`Automatic washing machine`=as.numeric(HH_Data$`Automatic washing machine`)

HH_Data$`Livestock`=as.character(factor(HH_Data$`Livestock`,
                                         levels = YesNoLabels[,2],
                                         labels = YesNoLabels[,4-language]))
HH_Data$`Location`=as.character(factor(HH_Data$`Location`,
                                        levels = LocationLabels[,2],
                                        labels = LocationLabels[,4-language]))
HH_Data$`Realstate`=as.character(factor(HH_Data$`Realstate`,
                                        levels = YesNoLabels[,2],
                                        labels = YesNoLabels[,4-language]))
HH_Data$`Agriculture land`=as.character(factor(HH_Data$`Agriculture land`,
                                        levels = AgriculturalLand[,2],
                                        labels = AgriculturalLand[,4-language]))
HH_Data$`Governorate`=as.character(factor(HH_Data$`Governorate`,
                                               levels = GovernorateLabels[,2],
                                               labels = GovernorateLabels[,4-language]))
HH_Data$`Location`=as.character(factor(HH_Data$`Location`,
                                               levels = LocationLabels[,2],
                                               labels = LocationLabels[,4-language]))





Indicators=HH_Data%>%
  left_join(DwellingTypeWeights) %>% 
  left_join(WallTypeWeights) %>% 
  left_join(FloorTypeWeights) %>% 
  left_join(RoofTypeWeights) %>% 
  left_join(CookingfuelWeights) %>% 
  left_join(ToiletTypeWeights) %>% 
  left_join(WaterWeights) %>% 
  left_join(DrinkingWaterWeights) %>% 
  left_join(ElectricitySourceWeights) %>% 
  left_join(SewageWaterWeights) %>% 
  left_join(LivestockWeights) %>% 
  left_join(ReaslestateWeights) %>% 
  left_join(AgriLandWeights) %>% 
  mutate(peoplePerRoom=`Household members`/`Number of rooms`,
           var_overcrowdingIssue=if_else(peoplePerRoom>3,1,0),
           var_duration_market=if_else(`Time to the market`>60,100,0),
           var_duration_school=if_else(`Time to school`>60,100,0),
           var_duration_hospital=if_else(`Time to hospital`>60,100,0),
           kitchen=if_else(`Kitchen`%in%c("Yes"),0,100),
           television=if_else(`TV`>0,0,90),
           phone=if_else(`Mobile phone`>0,0,90),
           carr=if_else(`Car`>0,0,80),
           fridge=if_else(`Fridge`>0,0,70),
           washing_machine=if_else(`Washing machine`>0,0,60),
           AC=if_else(`AC`>0,0,60),
           computer=if_else(`Computer`>0,0,50),
           internet=if_else(`Internet`%in%c("Yes"),0,50),
           heater=if_else(`Heater`%in%c("Yes"),0,50),
           sewing_machine=if_else(`Sewing Machine`%in%c("Yes"),0,40),
           solar=if_else(`Solar Panel`%in%c("Yes"),0,10),
           bike=if_else(`Bike`>0,0,10),
           auto_wash_machine=if_else(`Automatic washing machine`>0,0,10),
         var_Property_score= 100*(kitchen+television+phone+carr+fridge+washing_machine+AC+computer+internet+heater+sewing_machine+solar+bike+auto_wash_machine)/770,
         )
## Joining with `by = join_by(`House type`, `Contract type`)`
## Joining with `by = join_by(`Wall material`, `Wall quality`)`
## Joining with `by = join_by(`Floor material`, `Floor quality`)`
## Joining with `by = join_by(`Roof material`, `Roof quality`)`
## Joining with `by = join_by(`Cooking source`)`
## Joining with `by = join_by(`Toilet type`)`
## Joining with `by = join_by(Water)`
## Joining with `by = join_by(`Drinking water`)`
## Joining with `by = join_by(`Electricity source`)`
## Joining with `by = join_by(Sewers)`
## Joining with `by = join_by(Livestock, Location)`
## Joining with `by = join_by(Realstate, Location)`
## Joining with `by = join_by(`Agriculture land`, Location)`
#### COMBINE ####
completeData= Indicators %>% 
  left_join(t1) %>%
  dplyr::select(c("HHID"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
completeData_Vuln= Indicators %>% 
  left_join(t1) %>%
  dplyr::select(c("HHID", "Governorate", "Location"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
#selecting variables we need
PMTIndicators=completeData %>% 
  dplyr::select(c("HHID"), "var_overcrowdingIssue", "var_Wall_type","var_Floor_type", "var_Toilet_Type", "var_Drinking_water", "var_sewage",
                "var_Property_score", "var_AgriLand",
                "var_chronic","var_common", "var_measels", "var_tot_disab", "var_other_disab", "var_duration_hospital",
                "var_enrolment", "var_high_education_rate", "var_duration_school",
                "var_NEET", "var_unemployment_rate", "var_child_labour",
                "var_female_ratio", "var_young_dependency_ratio")


# 2.3 Calculate the score

SCORING = PMTIndicators %>% 
  mutate(
    s_var_overcrowdingIssue=(1/6)*(1/7)*var_overcrowdingIssue, # the value next to each variable is its weight. in this case we specified the weight but in other cases we have to stick to the weight given
    s_var_Wall=(1/6)*(1/7)*var_Wall_type,
    s_var_Floor=(1/6)*(1/7)*var_Floor_type,
    s_var_Toilet=(1/6)*(1/7)*var_Toilet_Type,
    s_var_DrinkWater=(1/6)*(1/7)*var_Drinking_water,
    s_var_Sewage=(1/6)*(1/7)*var_sewage,
    
    s_var_Property=(1/2)*(1/7)*var_Property_score,
    s_var_agriLand=(1/2)*(1/7)*var_AgriLand,
    
    s_var_chronic=(1/6)*(1/7)*var_chronic,
    s_var_common=(1/6)*(1/7)*var_common,
    s_var_measels=(1/6)*(1/7)*var_measels,
    s_var_totdisab=(1/6)*(1/7)*var_tot_disab,
    s_var_otherdisab=(1/6)*(1/7)*var_other_disab,
    s_var_durationHospital=(1/6)*(1/7)*var_duration_hospital,
    
    s_var_enrolment=(1/3)*(1/7)*var_enrolment,
    s_var_higheducation=(1/3)*(1/7)*var_high_education_rate,
    s_var_durationSchool=(1/3)*(1/7)*var_duration_school,
    
    s_var_NEET=(1/3)*(1/7)*var_NEET,
    s_var_unemp=(1/3)*(1/7)*var_unemployment_rate,
    s_var_childLabour=(1/3)*(1/7)*var_child_labour,
    
    s_var_female=(1/2)*(1/7)*var_female_ratio,
    s_var_youngdependency=(1/2)*(1/7)*var_young_dependency_ratio,
    
    score = s_var_overcrowdingIssue  + s_var_Wall + s_var_Floor +  s_var_Toilet + s_var_DrinkWater + s_var_Sewage +
      s_var_Property + s_var_agriLand + s_var_chronic + s_var_common + s_var_measels + s_var_totdisab + s_var_otherdisab +
      s_var_durationHospital + s_var_enrolment+ s_var_higheducation + s_var_durationSchool + s_var_NEET +  s_var_unemp +
      s_var_childLabour +s_var_female +  s_var_youngdependency )%>%
  drop_na()



#2.4 Add the new SCORING variable to the different datasets

#important note is to consider the head of the household not the whole family
filtered_Head = IndividualData%>%filter(`Relationship with HH`%in% c(1)) %>% 
  dplyr::select(c("HHID", "Gender", "Marital status", "Schooling level", "Employment Status"))

completeData_Vuln= completeData_Vuln %>% 
  left_join(SCORING) %>%
  dplyr::select(c("HHID", "Governorate", "Location","score"), starts_with("var_"))
## Joining with `by = join_by(HHID, var_Wall_type, var_Floor_type,
## var_Toilet_Type, var_Drinking_water, var_sewage, var_AgriLand,
## var_overcrowdingIssue, var_duration_school, var_duration_hospital,
## var_Property_score, var_chronic, var_common, var_tot_disab, var_other_disab,
## var_measels, var_enrolment, var_high_education_rate, var_NEET,
## var_unemployment_rate, var_child_labour, var_female_ratio,
## var_young_dependency_ratio)`
completeData_Vuln= completeData_Vuln %>% 
  left_join(filtered_Head) %>%
  dplyr::select(c("HHID", "Governorate", "Location","score", "Gender", "Marital status", "Schooling level", "Employment Status"), starts_with("var_"))
## Joining with `by = join_by(HHID)`
wb= openxlsx::createWorkbook(creator='ESCWA')
newSheet = addWorksheet(wb, sheetName = "Age vs. HH members")
MergedData= merge(IndividualData,HH_Data,by="HHID")