NDMA_DeIdentification_Kajiado_2000

NDMA Data Pre processing

The following are steps undertaken for deidentifying NDMA data. The data is dis aggregated per county for all 23 counties - ASAL. The information covers the years of 2000 - 2020, where data prior to 2016 was stored in a different database (REWAS) and data from 2016 henceforth in the new database (DEWS). In each county data set workbook there are 6 different sheets:

HHA REWAS, HHA DEWS, KIA REWAS, KIA DEWS, MUAC REWAS, MUAC DEWS

The process involves inspecting individual sheets for each data set, dropping P.I.I columns, and then writing all the sheets to a single workbook - Kajiado.

KAJIADO

1. HHA REWAS

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.2

library(geosphere)

## Warning: package 'geosphere' was built under R version 4.3.3

## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)

library(openxlsx)

## Warning: package 'openxlsx' was built under R version 4.3.3

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
hha_rewas_data <- read.xlsx(file_path, sheet = "HHA REWAS")

This data set has P.I.I’s in the “housename” column so we will drop that.

# Drop the specified PII columns 
hha_rewas_data <- hha_rewas_data %>%
  select(-housename)  

# Check the updated dataset
head(hha_rewas_data)

##   district_name divisioncode year    month purchamt_bean soldamt_bean
## 1       KAJIADO          128 2006    March             0            0
## 2       KAJIADO          127 2006     July             6            0
## 3       KAJIADO          131 2006     July            16            0
## 4       KAJIADO          125 2006      May             4            0
## 5       KAJIADO          129 2006 February             1            0
## 6       KAJIADO          131 2006 December             0            0
##   purchamt_oil purchamt_cowpea soldamt_cowpea purchamt_greengram
## 1         1.00               0              0                  0
## 2         2.00               0              0                  0
## 3         4.00               0              0                  0
## 4         4.00               0              0                  0
## 5         0.25               0              0                  0
## 6         0.00               0              0                  0
##   soldamt_greengram purchamt_milk soldamt_milk purchamt_millet soldamt_millet
## 1                 0             0            0               0              0
## 2                 0             0            0               0              0
## 3                 0             0            0               0              0
## 4                 0             0            0               0              0
## 5                 0             0            0               0              0
## 6                 0             0            0               0              0
##   purchamt_other soldamt_other purchamt_pigeon soldamt_pigeon purchamt_posho
## 1              0             0               0              0              0
## 2              0             0               0              0             20
## 3              0             0               0              0              0
## 4              0             0               0              0             64
## 5              0             0               0              0              3
## 6              0             0               0              0              0
##   purchamt_rice purchamt_siftmaize purchamt_sorg soldamt_sorg purchamt_sugar
## 1             0                 24             0            0              1
## 2             8                  4             0            0              4
## 3             0                 16             0            0             12
## 4             4                  0             0            0              4
## 5             1                  0             0            0              0
## 6             0                  0             0            0              0
##   purchamt_wheat purchamt_wholemaize soldamt_wholemaize inc_dailyrate app_cam
## 1              0                   0                  0           100      NA
## 2              0                   0                  0           200      NA
## 3              0                   0                  0            NA      NA
## 4              4                   0                  0            NA      NA
## 5              0                   3                  0            NA      NA
## 6              0                   0                  0            NA      NA
##   born_cam death_cam deathreason_cam slaughtreason_cam slaught_cam sold_cam
## 1        0         0            <NA>                NA           0        0
## 2        0         0            <NA>                NA           0        0
## 3        0         0            <NA>                NA           0        0
## 4        0         0            <NA>                NA           0        0
## 5        0         0            <NA>                NA           0        0
## 6        0         0            <NA>                NA           0        0
##   total_cam app_cat born_cat death_cat deathreason_cat slaughtreason_cat
## 1         0      NA        0         0            <NA>              <NA>
## 2         0      NA        0         0            <NA>              <NA>
## 3         0      NA        0         0            <NA>              <NA>
## 4         0     800        0         0            <NA>              <NA>
## 5         0    3500        0         2         Drought              <NA>
## 6         0      NA        0         0            <NA>              <NA>
##   slaught_cat sold_cat total_cat app_don born_don death_don deathreason_don
## 1           0        0         2      NA        0         0            <NA>
## 2           0        0         0      NA        0         0            <NA>
## 3           0        0        50      NA        0         0            <NA>
## 4           0        1         5      NA        0         0            <NA>
## 5           0        1         4      NA        0         0            <NA>
## 6           0        0         2      NA        0         0            <NA>
##   sold_don total_don app_goa born_goa death_goa deathreason_goa
## 1        0         2      NA        0         0            <NA>
## 2        0         1      NA        0         0            <NA>
## 3        0         2      NA        0         0            <NA>
## 4        0         0    1200        0         0            <NA>
## 5        0         0     500        0         0            <NA>
## 6        0         6    1500        4         0            <NA>
##   slaughtreason_goa slaught_goa sold_goa total_goa hhaid survf_borrowfood
## 1              <NA>           0        0        15 10433           Rarely
## 2              <NA>           0        0         0 11828            Never
## 3              <NA>           0        0         0 12030            Never
## 4              <NA>           0        2        10 11186            Often
## 5              <NA>           0        1         7 10244            Often
## 6              <NA>           0        1        15 13694             <NA>
##   aid_cfw child_schooldrop aid_ffw aidkg_ffw surv_migrate aid_gift hvst_cereal
## 1      no               no      no         0           no       no          no
## 2      no               no      no         0           no       no          no
## 3      no               no      no         0           no       no          no
## 4      no               no      no         0           no       no          no
## 5      no               no      no         0           no       no          no
## 6      no               no      no         0           no       no          no
##   hvst_legume hh_totalmembers survf_lesspreffood survf_limitportion
## 1          no               6             Rarely             Rarely
## 2          no              11              Never              Never
## 3          no               5              Never              Never
## 4          no              13              Often              Often
## 5          no               9              Often              Often
## 6          no               6               <NA>               <NA>
##   survf_skipmeal survdesc_other surv_other hh_ownslivestock survf_foodcredit
## 1         Rarely             NA         no              yes           Rarely
## 2          Never             NA         no              yes           Rarely
## 3          Never             NA         no              yes           Rarely
## 4         Rarely             NA         no              yes            Often
## 5          Never             NA         no              yes            Often
## 6           <NA>             NA         no              yes             <NA>
##   purch_foodstuff inc_remittance survf_reducemeals aid_food aidkg_food
## 1             yes             no            Rarely      yes         20
## 2             yes             no             Never      yes         40
## 3             yes             no             Never       no          0
## 4             yes             no             Often      yes          4
## 5             yes             no             Often       no          0
## 6              no             no              <NA>       no          0
##   aid_remittance surv_sellbreedingstock surv_selldraught surv_selltools
## 1             no                     no               no             no
## 2             no                     no               no             no
## 3             no                     no               no             no
## 4             no                     no               no             no
## 5             no                     no               no             no
## 6             no                     no               no             no
##   surv_sellvaluables surv_sellmilkanimal sold_foodstuff stock_cereal
## 1                  2                  no             no           no
## 2                  2                  no             no           no
## 3                  2                  no             no           no
## 4                  2                  no             no           no
## 5                  2                  no             no           no
## 6                  2                  no             no           no
##   stock_legume aid_suppfood aidkg_suppfood stockexp_cereal stockexp_legume
## 1           no           no              0               0               0
## 2           no           no              0               0               0
## 3           no           no              0               0               0
## 4           no           no              0               0               0
## 5           no           no              0               0               0
## 6           no           no              0               0               0
##   inc_relynormalsource  item ldisease_cbpp ldisease_ccpp ldisease_diarrhea
## 1                   no  8614            no            no                NA
## 2                  yes 10009            no            no                NA
## 3                  yes 10211            no            no                NA
## 4                   no  9367            no            no                NA
## 5                  yes  8425            no            no                NA
## 6                   no 11875            no            no                NA
##   ldisease_other ldisease_ecfever ldisease_fmd ldisease_lumpy ldisease_ncastle
## 1           <NA>               no           no             no               no
## 2           <NA>               no           no             no               no
## 3           <NA>               no           no             no               no
## 4           <NA>               no           no             no               no
## 5           <NA>               no           no             no               no
## 6             no               no           no             no               no
##   vacc_anthrax vacc_cbpp vacc_ccpp vacc_diarrhea vacc_fmd vacc_lumpy
## 1           no        no        no            no       no          2
## 2           no        no        no            no       no          2
## 3           no        no        no            no       no          2
## 4           no        no        no            no       no          2
## 5           no        no        no            no       no          2
## 6           no        no        no            no       no          2
##   vacc_ncastle vacc_other vacc_worms inc_currentsource milksold_cat
## 1           no       <NA>         no      Casual Labor            0
## 2           no       <NA>         no Sale of Livestock            0
## 3           no       <NA>         no Employment/Salary            0
## 4           no       <NA>         no Sale of Livestock            0
## 5           no       <NA>         no Sale of Livestock            0
## 6           no         no         no Sale of Livestock            0
##   milksold_goa milksold_cam total_milkcam total_milkcat total_milkgoa
## 1            0            0             0             0             0
## 2            0            0             0             0             0
## 3            0            0             0             3             0
## 4            0            0             0             0             0
## 5            0            0             0             0             0
## 6            0            0             0             0             0
##   total_milkshe milkdaily_cam milkdaily_cat milkdaily_goa milkdaily_she
## 1             0             0             0             0             0
## 2             0             0             0             0             0
## 3             0             0             6             0             0
## 4             0             0             0             0             0
## 5             0             0             0             0             0
## 6             0             0             0             0             0
##   milksold_she inc_emplcasualwork inc_seekcasualwork inc_normalsource
## 1            0                  1                  1               NA
## 2            0                  1                  1               NA
## 3            0                  0                  0               NA
## 4            0                  0                  0               NA
## 5            0                  0                  0               NA
## 6            0                  0                  0               NA
##   pest_otherdesc inc_otherdesc pest app_pou death_pou deathreason_pou
## 1           NONE          NONE    0      NA         0            <NA>
## 2           NONE          NONE    0     200         0            <NA>
## 3           NONE          NONE    0      NA         0            <NA>
## 4           <NA>          <NA>    0      NA         0            <NA>
## 5           NONE          NONE    0      NA         0            <NA>
## 6           <NA>          <NA>    0      NA         0            <NA>
##   slaughtreason_pou slaught_pou sold_pou total_pou ppkpurch_bean ppksold_bean
## 1              <NA>           0        0        20            NA           NA
## 2              <NA>           0        2        14            50           NA
## 3              <NA>           0        0        11            50           NA
## 4              <NA>           0        0         0            50           NA
## 5              <NA>           0        0         1            25           NA
## 6              <NA>           0        0        20            NA           NA
##   ppkpurch_oil ppkpurch_cowpea ppksold_cowpea ppkpurch_greengram
## 1           30              NA             NA                 NA
## 2          130              NA             NA                 NA
## 3          120              NA             NA                 NA
## 4          120              NA             NA                 NA
## 5          120              NA             NA                 NA
## 6           NA              NA             NA                 NA
##   ppksold_greengram pplpurch_milk pplsold_milk ppkpurch_millet ppksold_millet
## 1                NA            NA           NA              NA             NA
## 2                NA            NA           NA              NA             NA
## 3                NA            NA           NA              NA             NA
## 4                NA            NA           NA              NA             NA
## 5                NA            NA           NA              NA             NA
## 6                NA            NA           NA              NA             NA
##   ppurch_other psold_other ppkpurch_pigeon ppksold_pigeon ppkpurch_posho
## 1           NA          NA              NA             NA             NA
## 2           NA          NA              NA             NA           23.0
## 3           NA          NA              NA             NA             NA
## 4           NA          NA              NA             NA           21.5
## 5           NA          NA              NA             NA           20.0
## 6           NA          NA              NA             NA             NA
##   ppkpurch_rice ppkpurch_siftmaize ppkpurch_sorg ppksold_sorg ppkpurch_sugar
## 1            NA               35.0            NA           NA             75
## 2            40               32.5            NA           NA             70
## 3            NA               60.0            NA           NA             70
## 4            40                 NA            NA           NA             60
## 5            40                 NA            NA           NA             NA
## 6            NA                 NA            NA           NA             NA
##   ppkpurch_wheat ppkpurch_wholemaize ppksold_wholemaize qtymilkproduced sacode
## 1             NA                  NA                 NA               0    199
## 2             NA                  NA                 NA               0    197
## 3             NA                  NA                 NA               0    190
## 4             40                  NA                 NA               0    195
## 5             NA                  20                 NA               0    193
## 6             NA                  NA                 NA               3    191
##   app_she born_she death_she deathreason_she slaughtreason_she slaught_she
## 1    1200        0         0            <NA>              <NA>           0
## 2    1300        1         0            <NA>              <NA>           0
## 3    1800        0         0            <NA>              <NA>           0
## 4      NA        0         0            <NA>              <NA>           0
## 5      NA        0         0            <NA>              <NA>           0
## 6    1200        3         0            <NA>              <NA>           0
##   sold_she total_she tag whodrankmilk       wrhh age1 age2 age3 age4 age5 age6
## 1        4         5   1       no one     Middle   NA   NA   NA   NA   NA   NA
## 2        1         2   1       no one       Poor   NA   NA   NA   NA   NA   NA
## 3       10        80   1    everybody Better Off   NA   NA   NA   NA   NA   NA
## 4        0         9   1       no one    Poorest   NA   NA   NA   NA   NA   NA
## 5        0         5   1       no one    Poorest   NA   NA   NA   NA   NA   NA
## 6        2        11   1         <NA>     Middle   NA   NA   NA   NA   NA   NA
##   soldamt_banana_old ppu_banana_old bled_cam bled_cat bled_goa bled_she
## 1                 NA             NA       NA       NA       NA       NA
## 2                 NA             NA       NA       NA       NA       NA
## 3                 NA             NA       NA       NA       NA       NA
## 4                 NA             NA       NA       NA       NA       NA
## 5                 NA             NA       NA       NA       NA       NA
## 6                 NA             NA       NA       NA       NA       NA
##   purch_cereals aid_cfw_old weather_old community csvname districtcode divname
## 1            NA          NA          NA        NA      NA           NA      NA
## 2            NA          NA          NA        NA      NA           NA      NA
## 3            NA          NA          NA        NA      NA           NA      NA
## 4            NA          NA          NA        NA      NA           NA      NA
## 5            NA          NA          NA        NA      NA           NA      NA
## 6            NA          NA          NA        NA      NA           NA      NA
##   fieldmonitorname aid_ffw_old aidkg_unimix child_unimix child_under5
## 1               NA          NA           NA           NA           NA
## 2               NA          NA           NA           NA           NA
## 3               NA          NA           NA           NA           NA
## 4               NA          NA           NA           NA           NA
## 5               NA          NA           NA           NA           NA
## 6               NA          NA           NA           NA           NA
##                 lhzone soldamt_maize_old ppu_maize_old soldamt_mango_old
## 1 Pastoral-All Species                NA            NA                NA
## 2         Agropastoral                NA            NA                NA
## 3 Pastoral-All Species                NA            NA                NA
## 4 Pastoral-All Species                NA            NA                NA
## 5 Pastoral-All Species                NA            NA                NA
## 6         Agropastoral                NA            NA                NA
##   ppu_mango_old milkyest_cam milkyest_cat milkyest_goa milkyest_she mon_date
## 1            NA           NA           NA           NA           NA       NA
## 2            NA           NA           NA           NA           NA       NA
## 3            NA           NA           NA           NA           NA       NA
## 4            NA           NA           NA           NA           NA       NA
## 5            NA           NA           NA           NA           NA       NA
## 6            NA           NA           NA           NA           NA       NA
##   mon_office muac1 muac2 muac3 muac4 muac5 muac6 name1 name2 name3 name4 name5
## 1         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 3         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 4         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 5         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 6         NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
##   name6 aid_received aid_receivedother soldamt_other_old ppu_other_old pest_old
## 1    NA           NA                NA                NA            NA       NA
## 2    NA           NA                NA                NA            NA       NA
## 3    NA           NA                NA                NA            NA       NA
## 4    NA           NA                NA                NA            NA       NA
## 5    NA           NA                NA                NA            NA       NA
## 6    NA           NA                NA                NA            NA       NA
##   aid_food_old soldamt_rice_old ppu_rice_old saname soldamt_sorg_old
## 1           NA               NA           NA     NA               NA
## 2           NA               NA           NA     NA               NA
## 3           NA               NA           NA     NA               NA
## 4           NA               NA           NA     NA               NA
## 5           NA               NA           NA     NA               NA
## 6           NA               NA           NA     NA               NA
##   ppu_sorg_old
## 1           NA
## 2           NA
## 3           NA
## 4           NA
## 5           NA
## 6           NA

We save it to a new workbook and populate the rest of the sheets sequentially to the same Garissa workbook after relevant pre processing steps have been taken.

# Define the path for the new Excel workbook
new_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Create a new workbook
wb <- createWorkbook()

# Add the HHA-REWAS data to the new Kajiado workbook
addWorksheet(wb, "HHA REWAS")
writeData(wb, "HHA REWAS", hha_rewas_data)

# Save the new workbook
saveWorkbook(wb, new_file_path, overwrite = TRUE)

2. HHA DEWS

The geocoordinates in the HHA dataset represent household coordinates, we will mask them (random displacement) using the Haversine Formula to randomly distribute a point around a central coordinate within a radius of 2.5 KM and drop other P.I.I.s.

library(openxlsx)

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
hha_dews_data <- read.xlsx(file_path, sheet = "HHA DEWS")

The dataset contains household coordinates in columns “Lat” and “Long” which are considered P.I.I’s so we mask the coordinates, verify by plotting a histogram of the distribution of displacement distances of the original and displaced coordinates to establish uniformity.

We check for and deal with outliers if any in the “Lat” and “Long” columns

# Verify the dataset
summary(hha_dews_data$Lat)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -3.0841 -1.5631  0.0000 -0.7134  0.0000  0.0000     905

summary(hha_dews_data$Long)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00   13.61   36.64   37.74     905

There seems to be erroneous entries. 905 rows also dont have entries for coordinates.

# Replace (0,0) coordinates with NA only in the Lat and Long columns
hha_dews_data$Lat[hha_dews_data$Lat == 0 & hha_dews_data$Long == 0] <- NA
hha_dews_data$Long[hha_dews_data$Lat == 0 & hha_dews_data$Long == 0] <- NA

# Calculate the mean for Lat and Long, ignoring NA values
lat_mean <- mean(hha_dews_data$Lat, na.rm = TRUE)
lon_mean <- mean(hha_dews_data$Long, na.rm = TRUE)

lat_sd <- sd(hha_dews_data$Lat, na.rm = TRUE)
lon_sd <- sd(hha_dews_data$Long, na.rm = TRUE)

# Calculate Z-scores
hha_dews_data <- hha_dews_data %>%
  mutate(lat_z = (Lat - lat_mean) / lat_sd,
         lon_z = (Long - lon_mean) / lon_sd)

# Set threshold for identifying outliers
threshold <- 3  # Common threshold for Z-scores

# Replace outliers with NA
hha_dews_data <- hha_dews_data %>%
  mutate(Lat = ifelse(abs(lat_z) > threshold & !is.na(lat_z), NA, Lat),
         Long = ifelse(abs(lon_z) > threshold & !is.na(lon_z), NA, Long))

# Remove the Z-score columns 
hha_dews_data <- hha_dews_data %>%
  select(-lat_z, -lon_z)

# Verify the dataset
summary(hha_dews_data$Lat)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -3.084  -1.983  -1.895  -1.933  -1.530  -1.141    7230

summary(hha_dews_data$Long)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00   13.61   36.64   37.74     905

6325 rows in total are affected by the outlier and are replaced with NAs, bringing the total to 7230 NAs. We proceed to mask the coordinates.

# Create backup columns for original coordinates
hha_dews_data$Original_Lat <- hha_dews_data$Lat
hha_dews_data$Original_Long <- hha_dews_data$Long

# Function to generate random displaced coordinates with uniform distance distribution
mask_coordinates_uniform <- function(lat, lon, radius_km) {
  R <- 6371  # Earth radius in kilometers
  
  # Random bearing angle (in radians)
  bearing <- runif(1, 0, 2 * pi)
  
  # Random distance uniformly sampled from [0, radius_km]
  rand_dist <- runif(1, 0, radius_km) / R  # Uniformly sampled distance in radians
  
  # Convert original coordinates to radians
  lat_rad <- lat * pi / 180
  lon_rad <- lon * pi / 180
  
  # Calculate new latitude (in radians)
  new_lat <- asin(sin(lat_rad) * cos(rand_dist) + 
                    cos(lat_rad) * sin(rand_dist) * cos(bearing))
  
  # Calculate new longitude (in radians)
  new_lon <- lon_rad + atan2(sin(bearing) * sin(rand_dist) * cos(lat_rad),
                             cos(rand_dist) - sin(lat_rad) * sin(new_lat))
  
  # Convert back to degrees
  new_lat <- new_lat * 180 / pi
  new_lon <- new_lon * 180 / pi
  
  return(c(new_lat, new_lon))
}

# Set displacement radius in kilometers
radius_km <- 2.5  

# Generate masked coordinates for each row using the modified function
masked_coords <- t(apply(hha_dews_data, 1, function(row) {
  mask_coordinates_uniform(as.numeric(row["Original_Lat"]), as.numeric(row["Original_Long"]), radius_km)
}))

# Replace the original Lat and Long columns with the masked coordinates
hha_dews_data$Lat <- masked_coords[, 1]
hha_dews_data$Long <- masked_coords[, 2]

We then evaluate the distribution of the displacement distances before dropping the original coordinates by plotting a histogram.

# Calculate displacement distances (in kilometers) as before
displacement_distances <- distHaversine(
  cbind(hha_dews_data$Long, hha_dews_data$Lat),  # Masked coordinates
  cbind(hha_dews_data$Original_Long, hha_dews_data$Original_Lat)  # Original coordinates
) / 1000  # Convert meters to kilometers

# Add displacement distances to the dataset for further analysis
hha_dews_data$Displacement_Distance <- displacement_distances

# Plot: Histogram of displacement distances
ggplot(hha_dews_data, aes(x = Displacement_Distance)) +
  geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black") +
  labs(title = "Distribution of Displacement Distances",
       x = "Displacement Distance (km)", y = "Count") +
  theme_minimal()

## Warning: Removed 7230 rows containing non-finite values (`stat_bin()`).

There are no significant peaks or valleys in the histogram, suggesting that the displacements are indeed more uniformly distributed, as intended.

We then drop the original coordinates column leaving only the masked coordinates columns. We also drop other PII’s which are the “HouseholdName”, “HouseHoldHead”, and “RespondentName”.

# Drop the specified PII columns along with original coordinates
hha_dews_data <- hha_dews_data %>%
  select(-c(Original_Lat, Original_Long, Displacement_Distance, HouseholdName, HouseHoldHead, RespondentName))  

# Check the updated dataset
head(hha_dews_data)

##    QID  County     SubCounty        Ward LivelihoodZone Month Year Lat Long
## 1 2177 Kajiado Kajiado South       Rombo       Pastoral March 2016  NA   NA
## 2 2178 Kajiado Kajiado South       Rombo       Pastoral March 2016  NA   NA
## 3 2179 Kajiado Kajiado South       Rombo       Pastoral March 2016  NA   NA
## 4 2208 Kajiado Kajiado South       Rombo       Pastoral March 2016  NA   NA
## 5 2210 Kajiado Kajiado South       Rombo       Pastoral March 2016  NA   NA
## 6 2228 Kajiado  Kajiado West Loodokilani       Pastoral March 2016  NA   NA
##   InterviewDate HouseholdCode HeadEducationLevel MainHHIncomeSource HeadGender
## 1         42446          0001               <NA>               <NA>       Male
## 2         42446           002               <NA>               <NA>       Male
## 3         42446           003               <NA>               <NA>       Male
## 4         42447           003               <NA>               <NA>     Female
## 5         42447           004               <NA>               <NA>     Female
## 6         42448           231               <NA>               <NA>       Male
##   RespondentGender MaleMembers FemaleMembers ChildrenBelow5 KeepLivestock
## 1           Female           5             3              2          TRUE
## 2           Female           6            10              5         FALSE
## 3           Female           9             6              4         FALSE
## 4           Female           6             3              3         FALSE
## 5           Female           0             3              1         FALSE
## 6             Male           9             5              3         FALSE
##   MilkAnimals MilkSource HowOftenMilked AverageMilkedPerDay
## 1        TRUE       <NA>             NA                  NA
## 2       FALSE       <NA>             NA                  NA
## 3       FALSE       <NA>             NA                  NA
## 4       FALSE       <NA>             NA                  NA
## 5       FALSE       <NA>             NA                  NA
## 6          NA       <NA>             NA                  NA
##   AverageMilkConsumedPerDay              WhoDrankMilk AverageMilkPrice
## 1                        NA    Children under 5 years               NA
## 2                        NA    Children under 5 years               NA
## 3                        NA    Children under 5 years               NA
## 4                        NA    Children under 5 years               NA
## 5                        NA    Children under 5 years               NA
## 6                        NA Everyone in the household               NA
##   HarvestedInLastWeeks AcresHarvested BagsHarvested HaveFoodStock
## 1                 TRUE             NA            NA          TRUE
## 2                FALSE             NA            NA         FALSE
## 3                FALSE             NA            NA         FALSE
## 4                FALSE             NA            NA         FALSE
## 5                FALSE             NA            NA         FALSE
## 6                FALSE             NA            NA         FALSE
##   FoodStockSources DaysStockLast WaterSource1  WaterSource2
## 1       Production            21       Rivers      -Select-
## 2       Production             0       Rivers Pans and dams
## 3       Production             0       Rivers      -Select-
## 4       Production             0       Rivers      -Select-
## 5       Production             0       Rivers      -Select-
## 6             <NA>            NA    Boreholes Pans and dams
##              WaterSource3 NormalWaterSource   WhyNotNormalWaterSource
## 1                -Select-             FALSE Breakdown of water source
## 2 Traditional Water Wells             FALSE Breakdown of water source
## 3                -Select-             FALSE Breakdown of water source
## 4                -Select-             FALSE Breakdown of water source
## 5                -Select-             FALSE Breakdown of water source
## 6           Shallow wells              TRUE Breakdown of water source
##   DaysWaterSourceExpectedToLast DistanceFromWaterSource NoWaterJerryCans
## 1                             0                       0                0
## 2                            13                       6                5
## 3                             0                       0                0
## 4                             0                       0                0
## 5                             0                       0                0
## 6                            15                       5               10
##   JerryCansCost NormalHHWaterConsumption HHPayForWater CostTransportJerryCan
## 1             0                        0         FALSE                     0
## 2             6                        4         FALSE                     4
## 3             0                        0         FALSE                     0
## 4             0                        0         FALSE                     0
## 5             0                        0         FALSE                     0
## 6             0                       10         FALSE                    10
##   TreatWaterBeforeDrinking WaterTreatmentMethodUsed CSI_ReliedOnLess
## 1                    FALSE                     <NA>                0
## 2                    FALSE                     <NA>                2
## 3                    FALSE                     <NA>                0
## 4                    FALSE                     <NA>                0
## 5                    FALSE                     <NA>                0
## 6                     TRUE                  Boiling                2
##   CSI_BorrowedFood CSI_ReducedNoOfMeals CSI_ReducedPortionMealSize
## 1                0                    0                          0
## 2                1                    0                          1
## 3                0                    0                          0
## 4                0                    0                          0
## 5                0                    0                          0
## 6                2                    5                          2
##   CSI_QuantityForAdult CSI_SoldHouseholdAssets CSI_ReducedNonFoodExpenses
## 1                    0                       4                          4
## 2                    0                       4                          4
## 3                    0                       4                          4
## 4                    0                       4                          4
## 5                    0                       4                          4
## 6                    1                       4                          4
##   CSI_SoldProductiveAssets CSI_SpentSavings CSI_BorrowedMoney CSI_SoldHouseLand
## 1                        4                4                NA                 4
## 2                        4                4                NA                 4
## 3                        4                4                NA                 4
## 4                        4                4                NA                 4
## 5                        4                4                NA                 4
## 6                        4                4                 4                 4
##   CSI_WithdrewChildrenSchool CSI_SoldLastFemaleAnimal CSI_Begging
## 1                          4                        4           4
## 2                          4                        4           4
## 3                          4                        4           4
## 4                          4                        4           4
## 5                          4                        4           4
## 6                          4                        1           4
##   CSI_SoldMoreAnimals HFC_GrainDays HFC_GrainSource HFC_RootsDays
## 1                   4             7               1            NA
## 2                   4             7               5            NA
## 3                   4             5               5            NA
## 4                   4             6               5            NA
## 5                   4             6               1            NA
## 6                   4             7               5             5
##   HFC_RootsSource HFC_PulsesNutsDays HFC_PulsesNutsSource HFC_OrangeVegDays
## 1              NA                  3                    1                NA
## 2              NA                  0                   10                NA
## 3              NA                  0                   10                NA
## 4              NA                  0                   10                NA
## 5              NA                  1                    1                NA
## 6              NA                  2                    5                 5
##   HFC_OrangeVegSource HFC_GreenLeafyDays HFC_GreenLeafySource HFC_OtherVegDays
## 1                  NA                 NA                   NA                5
## 2                  NA                 NA                   NA                7
## 3                  NA                 NA                   NA                0
## 4                  NA                 NA                   NA                3
## 5                  NA                 NA                   NA                6
## 6                  NA                  2                   NA                2
##   HFC_OtherVegSource HFC_OrangeFruitsDays HFC_OrangeFruitsSource
## 1                  1                   NA                     NA
## 2                  5                   NA                     NA
## 3                 10                   NA                     NA
## 4                  5                   NA                     NA
## 5                  1                   NA                     NA
## 6                  5                    1                     NA
##   HFC_OtherFruitsDays HFC_OtherFruitsSource HFC_MeatDays HFC_MeatSource
## 1                   2                     5            2              1
## 2                   0                    10            0             10
## 3                   0                    10            0             10
## 4                   0                    10            0             10
## 5                   2                     9            1              5
## 6                   0                    10            0             10
##   HFC_LiverDays HFC_LiverSource HFC_FishDays HFC_EggsDays HFC_EggsSource
## 1            NA              NA           NA           NA             NA
## 2            NA              NA           NA           NA             NA
## 3            NA              NA           NA           NA             NA
## 4            NA              NA           NA           NA             NA
## 5            NA              NA           NA           NA             NA
## 6             0              NA            0            0             NA
##   HFC_MilkDays HFC_MilkSource HFC_OilDays HFC_OilSource HFC_SugarDays
## 1            6              1           7             5             7
## 2            2              1           5             5             5
## 3            2              5           2             5             0
## 4            1              5           2             5             4
## 5            2              1           5             5             7
## 6            2              1           3             5             7
##   HFC_SugarSource HFC_CondimentsDays HFC_CondimentsSource
## 1               5                  2                    5
## 2               5                  0                   10
## 3              10                  0                   10
## 4               5                  0                   10
## 5               5                  0                   10
## 6               5                  1                    5
##                MainIncomeSource MaleCasualLabour FemaleCasualLabour
## 1 3. Sale of livestock products                1                  1
## 2              1. Sale of crops                4                  0
## 3              1. Sale of crops                0                  0
## 4              1. Sale of crops                0                  0
## 5              1. Sale of crops                0                  0
## 6             Sale of livestock                2                  0
##   CasualLabourEarn CharcoalSaleEarn WoodSaleEarn DivisionID CountyID SiteID
## 1            10000                0            0        101       19    204
## 2            12000                0            0        101       19    204
## 3                0                0            0        101       19    204
## 4                0                0            0        101       19    204
## 5                0                0            0        101       19    204
## 6             2000                0            0         98       19    208
##   LivelihoodZoneID DateCaptured
## 1                1           NA
## 2                1           NA
## 3                1           NA
## 4                1           NA
## 5                1           NA
## 6                1           NA

We also have to ensure that the “InterviewDate” column is parsed correctly as a date before saving the worksheet to the new workbook.

# Ensure the column is numeric
hha_dews_data$InterviewDate <- as.numeric(hha_dews_data$InterviewDate)

# Convert the numeric date to Date format
hha_dews_data$InterviewDate <- as.Date(hha_dews_data$InterviewDate, origin = "1899-12-30")

# View the first few dates to verify the conversion
head(hha_dews_data$InterviewDate)

## [1] "2016-03-17" "2016-03-17" "2016-03-17" "2016-03-18" "2016-03-18"
## [6] "2016-03-19"

Save the cleaned data set as a different sheet in the Kajiado workbook

# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Load the existing workbook
wb <- loadWorkbook(existing_file_path)

# Add the cleaned HHA DEWS data to the existing workbook
addWorksheet(wb, "HHA DEWS")
writeData(wb, "HHA DEWS", hha_dews_data)

# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)

3. KIA REWAS

library(openxlsx)

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
kia_rewas_data <- read.xlsx(file_path, sheet = "KIA REWAS")

There are no P.I.I columns in this particular sheet. We save this new sheet alongside the previous two in the Kajiado workbook created

# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Load the existing workbook
wb <- loadWorkbook(existing_file_path)

# Add the KIA REWAS data to the existing workbook
addWorksheet(wb, "KIA REWAS")
writeData(wb, "KIA REWAS", kia_rewas_data)

# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)

4. KIA DEWS

library(openxlsx)

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
kia_dews_data <- read.xlsx(file_path, sheet = "KIA DEWS")

There are no P.I.I columns in this particular sheet. We ensure that the “InterviewDate” column is parsed correctly as a date

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

# Convert the numeric date to Date format
kia_dews_data$InterviewDate <- as.Date(kia_dews_data$InterviewDate, origin = "1899-12-30")

# View the first few dates to verify the conversion
head(kia_dews_data$InterviewDate)

## [1] "2016-07-07" "2016-07-07" "2016-07-07" "2016-07-09" "2016-07-08"
## [6] "2016-07-09"

We save this new sheet alongside the previous three in the Kajiado workbook created

# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Load the existing workbook
wb <- loadWorkbook(existing_file_path)

# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "KIA DEWS")
writeData(wb, "KIA DEWS", kia_dews_data)

# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)

5. MUAC REWAS

library(openxlsx)

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
muac_rewas_data <- read.xlsx(file_path, sheet = "MUAC REWAS")

We drop PII’s which are the “fname”, and “hhname”.

# Drop the specified PII columns 
muac_rewas_data <- muac_rewas_data %>%
  select(-c(fname, hhname ))  

# Check the updated dataset
head(muac_rewas_data)

##   district_name year     admin6id child_age batchid child_sickcode district
## 1       KAJIADO 2014 KE0104040302        24    1340              0        5
## 2       KAJIADO 2014 KE0104040302        29    1340              0        5
## 3       KAJIADO 2014 KE0104040302        59    1340              0        5
## 4       KAJIADO 2014 KE0104040302        30    1340              0        5
## 5       KAJIADO 2014 KE0104040302        19    1340              0        5
## 6       KAJIADO 2014 KE0104040302        40    1340              0        5
##   division child_sex child_sick  hhaid hhamuacid item child_hh lzonehh month
## 1      127      Male         No 522618    135287    1      Yes       1     1
## 2      127    Female         No 522617    135286    2      Yes       1     1
## 3      127      Male         No 522616    135285    3       No       1     1
## 4      127    Female         No 522617    135284    4       No       1     1
## 5      127    Female         No 522616    135283    5      Yes       1     1
## 6      127    Female         No 522616    135282    6      Yes       1     1
##   muac sacode serialno
## 1  152    197      102
## 2  168    197      101
## 3  144    197      100
## 4  154    197      101
## 5  150    197      100
## 6  154    197      100

We save this new sheet alongside the previous four in the Kajiado workbook created

# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Load the existing workbook
wb <- loadWorkbook(existing_file_path)

# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "MUAC REWAS")
writeData(wb, "MUAC REWAS", muac_rewas_data)

# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)

6. MUAC DEWS

library(openxlsx)

file_path <- "C:/Users/AAH USER/Downloads/Kajiado.xlsx"

# Read the specific sheet into a data frame
muac_dews_data <- read.xlsx(file_path, sheet = "MUAC DEWS")

This data set has P.I.I’s in the “ChildName” column so we will drop that.

# Drop the specified PII columns along with original coordinates
muac_dews_data <- muac_dews_data %>%
  select(-ChildName)  

# Check the updated dataset
head(muac_dews_data)

##   MUACIndicatorID  QID  County     SubCounty        Ward LivelihoodZone Month
## 1            2636 2228 Kajiado  Kajiado West Loodokilani       Pastoral March
## 2            2661 2228 Kajiado  Kajiado West Loodokilani       Pastoral March
## 3            2663 2179 Kajiado Kajiado South       Rombo       Pastoral March
## 4            2673 2178 Kajiado Kajiado South       Rombo       Pastoral March
## 5            2674 2178 Kajiado Kajiado South       Rombo       Pastoral March
## 6           13387 7960 Kajiado  Kajiado West Loodokilani   Pastoral All  July
##   Year HouseholdCode     Gender MUAC MUAC_Color AgeInMonths LiveInHousehold
## 1 2016           231 Female      140         25        TRUE        Diarrhea
## 2 2016           231 Male        150         24        TRUE            <NA>
## 3 2016           003 Male        150         36        TRUE            <NA>
## 4 2016           002 Female      157         46        TRUE            <NA>
## 5 2016           002 Male        145         20       FALSE            <NA>
## 6 2016           001 Female      172         50        TRUE            <NA>
##   SufferedIllnesses InterviewDate DivisionID CountyID SiteID LivelihoodZoneID
## 1             42448            98         19      208      1               NA
## 2             42448            98         19      208      1               NA
## 3             42446           101         19      204      1               NA
## 4             42446           101         19      204      1               NA
## 5             42446           101         19      204      1               NA
## 6             42559            98         19      208      5               NA

We also have to ensure that the “InterviewDate” column is parsed correctly as a date. The row values are displaced to the “SufferedIllnesses” column. We align this and proceed to parse the date correctly.

# Create a mask for rows to modify
rows_to_modify <- which(muac_dews_data$Year %in% 2016:2019)

# Ensure the columns being shifted are correctly specified
# We will create an index to specify which columns to shift
shift_columns <- c("MUAC_Color", "AgeInMonths", "LiveInHousehold", 
                   "SufferedIllnesses", "InterviewDate", 
                   "DivisionID", "CountyID", "SiteID", "LivelihoodZoneID")

# Create an empty data frame for the shifted values
shifted_values <- muac_dews_data[rows_to_modify, shift_columns]

# Replace the values in the original DataFrame with NA in the selected rows
muac_dews_data[rows_to_modify, shift_columns] <- NA

# Move the values one column to the right
for (i in seq_along(shift_columns)[-length(shift_columns)]) {
  muac_dews_data[rows_to_modify, shift_columns[i + 1]] <- shifted_values[[i]]
}

library(lubridate)
# Ensure the column is numeric
muac_dews_data$InterviewDate <- as.numeric(muac_dews_data$InterviewDate)
# Convert the Excel serial date to R Date
muac_dews_data$InterviewDate <- as.Date(muac_dews_data$InterviewDate, origin = "1899-12-30")
# Verify the output
head(muac_dews_data$InterviewDate)

## [1] "2016-03-19" "2016-03-19" "2016-03-17" "2016-03-17" "2016-03-17"
## [6] "2016-07-08"

Save this final sheet to the existing workbook

# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Kajiado.xlsx"

# Load the existing workbook
wb <- loadWorkbook(existing_file_path)

# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "MUAC DEWS")
writeData(wb, "MUAC DEWS", muac_dews_data)

# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)

NDMA_DeIdentification_Kajiado_2000_2020

Pheroze

2024-10-18