The following are steps undertaken for deidentifying NDMA data. The data is dis aggregated per county for all 23 counties - ASAL. The information covers the years of 2000 - 2020, where data prior to 2016 was stored in a different database (REWAS) and data from 2016 henceforth in the new database (DEWS). In each county data set workbook there are 6 different sheets:
HHA REWAS, HHA DEWS, KIA REWAS, KIA DEWS, MUAC REWAS, MUAC DEWS
The process involves inspecting individual sheets for each data set, dropping P.I.I columns, and then writing all the sheets to a single workbook - Baringo.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(geosphere)
## Warning: package 'geosphere' was built under R version 4.3.3
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
hha_rewas_data <- read.xlsx(file_path, sheet = "HHA-REWAS")
There are no P.I.I columns in this particular sheet so we save it to a new workbook and populate the rest of the sheets sequentially to the same Baringo workbook after relevant pre processing steps have been taken.
# Define the path for the new Excel workbook
new_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Create a new workbook
wb <- createWorkbook()
# Add the HHA-REWAS data to the new Baringo workbook
addWorksheet(wb, "HHA-REWAS")
writeData(wb, "HHA-REWAS", hha_rewas_data)
# Save the new workbook
saveWorkbook(wb, new_file_path, overwrite = TRUE)
The geocoordinates in the HHA dataset represent household coordinates, we will mask them (random displacement) using the Haversine Formula to randomly distribute a point around a central coordinate within a radius of 2.5 KM and drop other P.I.I.s.
library(openxlsx)
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
hha_dews_data <- read.xlsx(file_path, sheet = "HHA DEWS")
The dataset contains household coordinates in columns “Lat” and “Long” which are considered P.I.I’s so we mask the coordinates, verify by plotting a histogram of the distribution of displacement distances of the original and displaced coordinates to establish uniformity.
We check for and deal with outliers if any in the “Lat” and “Long” columns
# Verify the dataset
summary(hha_dews_data$Lat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.325 0.000 0.503 0.563 1.045 1.404 5122
summary(hha_dews_data$Long)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 35.79 23.19 36.05 36.89 5122
There seems to be erroneous entries. 5122 rows also dont have entries for coordinates.
# Replace (0,0) coordinates with NA only in the Lat and Long columns
hha_dews_data$Lat[hha_dews_data$Lat == 0 & hha_dews_data$Long == 0] <- NA
hha_dews_data$Long[hha_dews_data$Lat == 0 & hha_dews_data$Long == 0] <- NA
# Calculate the mean for Lat and Long, ignoring NA values
lat_mean <- mean(hha_dews_data$Lat, na.rm = TRUE)
lon_mean <- mean(hha_dews_data$Long, na.rm = TRUE)
lat_sd <- sd(hha_dews_data$Lat, na.rm = TRUE)
lon_sd <- sd(hha_dews_data$Long, na.rm = TRUE)
# Calculate Z-scores
hha_dews_data <- hha_dews_data %>%
mutate(lat_z = (Lat - lat_mean) / lat_sd,
lon_z = (Long - lon_mean) / lon_sd)
# Set threshold for identifying outliers
threshold <- 3 # Common threshold for Z-scores
# Replace outliers with NA
hha_dews_data <- hha_dews_data %>%
mutate(Lat = ifelse(abs(lat_z) > threshold & !is.na(lat_z), NA, Lat),
Long = ifelse(abs(lon_z) > threshold & !is.na(lon_z), NA, Long))
# Remove the Z-score columns
hha_dews_data <- hha_dews_data %>%
select(-lat_z, -lon_z)
# Verify the dataset
summary(hha_dews_data$Lat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.388 0.519 0.853 0.875 1.141 1.404 8387
summary(hha_dews_data$Long)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 35.79 23.19 36.05 36.89 5122
3265 rows in total are affected by the outlier and are replaced with NAs, bringing the total to 8387 NAs. We proceed to mask the coordinates.
# Create backup columns for original coordinates
hha_dews_data$Original_Lat <- hha_dews_data$Lat
hha_dews_data$Original_Long <- hha_dews_data$Long
# Function to generate random displaced coordinates with uniform distance distribution
mask_coordinates_uniform <- function(lat, lon, radius_km) {
R <- 6371 # Earth radius in kilometers
# Random bearing angle (in radians)
bearing <- runif(1, 0, 2 * pi)
# Random distance uniformly sampled from [0, radius_km]
rand_dist <- runif(1, 0, radius_km) / R # Uniformly sampled distance in radians
# Convert original coordinates to radians
lat_rad <- lat * pi / 180
lon_rad <- lon * pi / 180
# Calculate new latitude (in radians)
new_lat <- asin(sin(lat_rad) * cos(rand_dist) +
cos(lat_rad) * sin(rand_dist) * cos(bearing))
# Calculate new longitude (in radians)
new_lon <- lon_rad + atan2(sin(bearing) * sin(rand_dist) * cos(lat_rad),
cos(rand_dist) - sin(lat_rad) * sin(new_lat))
# Convert back to degrees
new_lat <- new_lat * 180 / pi
new_lon <- new_lon * 180 / pi
return(c(new_lat, new_lon))
}
# Set displacement radius in kilometers
radius_km <- 2.5
# Generate masked coordinates for each row using the modified function
masked_coords <- t(apply(hha_dews_data, 1, function(row) {
mask_coordinates_uniform(as.numeric(row["Original_Lat"]), as.numeric(row["Original_Long"]), radius_km)
}))
# Replace the original Lat and Long columns with the masked coordinates
hha_dews_data$Lat <- masked_coords[, 1]
hha_dews_data$Long <- masked_coords[, 2]
We then evaluate the distribution of the displacement distances before dropping the original coordinates by plotting a histogram.
# Calculate displacement distances (in kilometers) as before
displacement_distances <- distHaversine(
cbind(hha_dews_data$Long, hha_dews_data$Lat), # Masked coordinates
cbind(hha_dews_data$Original_Long, hha_dews_data$Original_Lat) # Original coordinates
) / 1000 # Convert meters to kilometers
# Add displacement distances to the dataset for further analysis
hha_dews_data$Displacement_Distance <- displacement_distances
# Plot: Histogram of displacement distances
ggplot(hha_dews_data, aes(x = Displacement_Distance)) +
geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black") +
labs(title = "Distribution of Displacement Distances",
x = "Displacement Distance (km)", y = "Count") +
theme_minimal()
## Warning: Removed 8387 rows containing non-finite values (`stat_bin()`).
There are no significant peaks or valleys in the histogram, suggesting that the displacements are indeed more uniformly distributed, as intended.
We then drop the original coordinates column leaving only the masked coordinates columns. We also drop other PII’s which are the “HouseholdName”, “HouseHoldHead”, and “RespondentName”.
# Drop the specified PII columns along with original coordinates
hha_dews_data <- hha_dews_data %>%
select(-c(Original_Lat, Original_Long, Displacement_Distance, HouseholdName, HouseHoldHead, RespondentName))
# Check the updated dataset
head(hha_dews_data)
## QID County SubCounty Ward LivelihoodZone Month Year Lat Long
## 1 8659 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## 2 8660 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## 3 8661 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## 4 8662 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## 5 8663 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## 6 8664 Baringo Baringo South Ilchamus Irrigated Cropping July 2016 NA NA
## InterviewDate HouseholdCode HeadEducationLevel MainHHIncomeSource HeadGender
## 1 42553 1 <NA> <NA> Male
## 2 42553 2 <NA> <NA> Male
## 3 42553 3 <NA> <NA> Male
## 4 42554 4 <NA> <NA> Female
## 5 42554 5 <NA> <NA> Male
## 6 42554 6 <NA> <NA> Male
## RespondentGender MaleMembers FemaleMembers ChildrenBelow5 KeepLivestock
## 1 Male 3 3 5 TRUE
## 2 Female 4 5 3 TRUE
## 3 Female 2 5 4 FALSE
## 4 Female 4 2 3 TRUE
## 5 Female 4 7 4 TRUE
## 6 Male 2 2 3 TRUE
## MilkAnimals MilkSource HowOftenMilked AverageMilkedPerDay
## 1 FALSE <NA> NA NA
## 2 FALSE <NA> NA NA
## 3 FALSE <NA> NA NA
## 4 FALSE <NA> NA NA
## 5 FALSE <NA> NA NA
## 6 FALSE <NA> NA NA
## AverageMilkConsumedPerDay WhoDrankMilk AverageMilkPrice
## 1 NA Children under 5 years NA
## 2 NA Children under 5 years NA
## 3 NA Children under 5 years NA
## 4 NA Children under 5 years NA
## 5 NA Children under 5 years NA
## 6 NA Children under 5 years NA
## HarvestedInLastWeeks AcresHarvested BagsHarvested HaveFoodStock
## 1 FALSE NA NA FALSE
## 2 FALSE NA NA FALSE
## 3 FALSE NA NA FALSE
## 4 FALSE NA NA TRUE
## 5 TRUE NA NA TRUE
## 6 FALSE NA NA TRUE
## FoodStockSources DaysStockLast WaterSource1 WaterSource2
## 1 Production 0 Rivers Traditional Water Wells
## 2 Production 0 Rivers Traditional Water Wells
## 3 Production 0 Rivers Traditional Water Wells
## 4 Purchase 7 Rivers Traditional Water Wells
## 5 Purchase 20 Rivers Traditional Water Wells
## 6 Purchase 12 Rivers Traditional Water Wells
## WaterSource3 NormalWaterSource WhyNotNormalWaterSource
## 1 Pans and dams TRUE Breakdown of water source
## 2 Pans and dams TRUE Breakdown of water source
## 3 Pans and dams FALSE Breakdown of water source
## 4 Canals TRUE Breakdown of water source
## 5 CANALS TRUE Breakdown of water source
## 6 Pans and dams TRUE Breakdown of water source
## DaysWaterSourceExpectedToLast DistanceFromWaterSource NoWaterJerryCans
## 1 180 1 4
## 2 180 1 6
## 3 180 1 6
## 4 60 2 5
## 5 180 2 8
## 6 180 1 4
## JerryCansCost NormalHHWaterConsumption HHPayForWater CostTransportJerryCan
## 1 0 0 FALSE 0
## 2 0 0 FALSE 0
## 3 0 0 FALSE 0
## 4 0 0 FALSE 0
## 5 0 0 FALSE 0
## 6 0 0 FALSE 0
## TreatWaterBeforeDrinking WaterTreatmentMethodUsed CSI_ReliedOnLess
## 1 FALSE <NA> 0
## 2 FALSE <NA> 1
## 3 FALSE <NA> 1
## 4 TRUE Filtration 0
## 5 TRUE Boiling 0
## 6 TRUE Boiling 0
## CSI_BorrowedFood CSI_ReducedNoOfMeals CSI_ReducedPortionMealSize
## 1 2 4 3
## 2 2 0 4
## 3 3 5 2
## 4 2 4 2
## 5 0 4 2
## 6 0 4 2
## CSI_QuantityForAdult CSI_SoldHouseholdAssets CSI_ReducedNonFoodExpenses
## 1 0 4 2
## 2 0 1 1
## 3 0 4 1
## 4 0 4 1
## 5 0 4 1
## 6 0 4 1
## CSI_SoldProductiveAssets CSI_SpentSavings CSI_BorrowedMoney CSI_SoldHouseLand
## 1 4 1 NA 1
## 2 4 1 NA 4
## 3 4 3 NA 1
## 4 4 1 NA 1
## 5 4 1 NA 1
## 6 4 3 NA 1
## CSI_WithdrewChildrenSchool CSI_SoldLastFemaleAnimal CSI_Begging
## 1 1 1 1
## 2 1 1 1
## 3 1 1 1
## 4 1 1 1
## 5 1 1 1
## 6 1 1 1
## CSI_SoldMoreAnimals HFC_GrainDays HFC_GrainSource HFC_RootsDays
## 1 1 7 5 NA
## 2 1 7 5 NA
## 3 1 7 5 NA
## 4 1 7 5 NA
## 5 1 7 5 NA
## 6 1 7 5 NA
## HFC_RootsSource HFC_PulsesNutsDays HFC_PulsesNutsSource HFC_OrangeVegDays
## 1 NA 2 5 NA
## 2 NA 2 5 NA
## 3 NA 4 5 NA
## 4 NA 2 5 NA
## 5 NA 2 5 NA
## 6 NA 2 5 NA
## HFC_OrangeVegSource HFC_GreenLeafyDays HFC_GreenLeafySource HFC_OtherVegDays
## 1 NA NA NA 7
## 2 NA NA NA 7
## 3 NA NA NA 7
## 4 NA NA NA 7
## 5 NA NA NA 7
## 6 NA NA NA 7
## HFC_OtherVegSource HFC_OrangeFruitsDays HFC_OrangeFruitsSource
## 1 5 NA NA
## 2 5 NA NA
## 3 5 NA NA
## 4 5 NA NA
## 5 5 NA NA
## 6 5 NA NA
## HFC_OtherFruitsDays HFC_OtherFruitsSource HFC_MeatDays HFC_MeatSource
## 1 0 10 1 3
## 2 3 5 1 1
## 3 0 10 1 5
## 4 3 5 1 5
## 5 1 5 2 1
## 6 1 5 1 5
## HFC_LiverDays HFC_LiverSource HFC_FishDays HFC_EggsDays HFC_EggsSource
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## HFC_MilkDays HFC_MilkSource HFC_OilDays HFC_OilSource HFC_SugarDays
## 1 3 5 7 5 7
## 2 7 5 7 5 7
## 3 4 5 7 5 7
## 4 3 5 7 5 7
## 5 7 5 7 5 7
## 6 7 5 7 5 7
## HFC_SugarSource HFC_CondimentsDays HFC_CondimentsSource MainIncomeSource
## 1 5 7 5 6. Sale of charcoal
## 2 5 7 5 9. Others
## 3 5 7 5 6. Sale of charcoal
## 4 5 7 5 6. Sale of charcoal
## 5 5 7 5 4. Casual labour
## 6 5 7 5 4. Casual labour
## MaleCasualLabour FemaleCasualLabour CasualLabourEarn CharcoalSaleEarn
## 1 1 1 0 1350
## 2 0 1 0 0
## 3 1 0 0 900
## 4 0 1 0 1800
## 5 1 1 2500 0
## 6 1 0 2200 0
## WoodSaleEarn DivisionID CountyID SiteID LivelihoodZoneID DateCaptured
## 1 0 1142 9 1497 4 NA
## 2 NA 1142 9 1497 4 NA
## 3 0 1142 9 1497 4 NA
## 4 0 1142 9 1497 4 NA
## 5 0 1142 9 1497 4 NA
## 6 0 1142 9 1497 4 NA
We also have to ensure that the “InterviewDate” column is parsed correctly as a date before saving the worksheet to the new workbook.
# Ensure the column is numeric
hha_dews_data$InterviewDate <- as.numeric(hha_dews_data$InterviewDate)
# Convert the numeric date to Date format
hha_dews_data$InterviewDate <- as.Date(hha_dews_data$InterviewDate, origin = "1899-12-30")
# View the first few dates to verify the conversion
head(hha_dews_data$InterviewDate)
## [1] "2016-07-02" "2016-07-02" "2016-07-02" "2016-07-03" "2016-07-03"
## [6] "2016-07-03"
Save the cleaned data set as a different sheet in the Baringo workbook
# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Load the existing workbook
wb <- loadWorkbook(existing_file_path)
# Add the cleaned HHA DEWS data to the existing workbook
addWorksheet(wb, "HHA DEWS")
writeData(wb, "HHA DEWS", hha_dews_data)
# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)
library(openxlsx)
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
kia_rewas_data <- read.xlsx(file_path, sheet = "KIA REWAS")
There are no P.I.I columns in this particular sheet. We ensure that the “date” column is parsed correctly as a date.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Ensure the column is numeric
kia_rewas_data$date <- as.numeric(kia_rewas_data$date)
## Warning: NAs introduced by coercion
# Convert the Excel serial date to R Date
kia_rewas_data$date <- as.Date(kia_rewas_data$date, origin ="1899-12-30")
# Verify the output
head(kia_rewas_data$date)
## [1] "2014-06-08" "2014-06-08" "2014-06-08" "2014-06-08" "2014-06-08"
## [6] "2014-06-08"
We save this new sheet alongside the previous two in the Baringo workbook created
# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Load the existing workbook
wb <- loadWorkbook(existing_file_path)
# Add the KIA REWAS data to the existing workbook
addWorksheet(wb, "KIA REWAS")
writeData(wb, "KIA REWAS", kia_rewas_data)
# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)
Checking for consistent ‘location names’ and ‘date’(month and year)
library(openxlsx)
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
kia_dews_data <- read.xlsx(file_path, sheet = "KIA DEWS")
There are no P.I.I columns in this particular sheet. We ensure that the “InterviewDate” column is parsed correctly as a date
library(lubridate)
# Convert the numeric date to Date format
kia_dews_data$InterviewDate <- as.Date(kia_dews_data$InterviewDate, origin = "1899-12-30")
# View the first few dates to verify the conversion
head(kia_dews_data$InterviewDate)
## [1] "2016-07-07" "2016-07-15" "2016-07-15" "2016-07-14" "2016-07-16"
## [6] "2016-07-06"
We save this new sheet alongside the previous three in the Baringo workbook created
# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Load the existing workbook
wb <- loadWorkbook(existing_file_path)
# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "KIA DEWS")
writeData(wb, "KIA DEWS", kia_dews_data)
# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)
library(openxlsx)
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
muac_rewas_data <- read.xlsx(file_path, sheet = "MUAC REWAS")
We drop PII’s which are the “fname”, and “hhname”.
# Drop the specified PII columns
muac_rewas_data <- muac_rewas_data %>%
select(-c(fname, hhname ))
# Check the updated dataset
head(muac_rewas_data)
## district_name year admin6id child_age batchid child_sickcode district
## 1 BARINGO 2014 KE0101050101 50 1340 0 1
## 2 BARINGO 2014 KE0101050101 22 1340 0 1
## 3 BARINGO 2014 KE0101050101 40 1340 0 1
## 4 BARINGO 2014 KE0101050101 26 1340 0 1
## 5 BARINGO 2014 KE0101050101 56 1340 0 1
## 6 BARINGO 2014 KE0101050101 52 1340 0 1
## division child_sex child_sick hhaid hhamuacid item child_hh lzonehh month
## 1 155 Male No 650965 39324 1 No 1 1
## 2 155 Male Yes 650965 39323 2 No 1 1
## 3 155 Male No 650965 39322 3 Yes 1 1
## 4 155 Female No 650965 39321 4 Yes 1 1
## 5 155 Male No 650965 39320 5 Yes 1 1
## 6 155 Female No 650964 39319 6 No 1 1
## muac sacode serialno
## 1 145 382 480
## 2 134 382 480
## 3 149 382 480
## 4 152 382 480
## 5 180 382 480
## 6 140 382 479
We save this new sheet alongside the previous four in the Baringo workbook created
# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Load the existing workbook
wb <- loadWorkbook(existing_file_path)
# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "MUAC REWAS")
writeData(wb, "MUAC REWAS", muac_rewas_data)
# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)
library(openxlsx)
file_path <- "C:/Users/AAH USER/Downloads/Baringo.xlsx"
# Read the specific sheet into a data frame
muac_dews_data <- read.xlsx(file_path, sheet = "MUAC DEWS")
This data set has P.I.I’s in the “ChildName” column so we will drop that.
# Drop the specified PII columns along with original coordinates
muac_dews_data <- muac_dews_data %>%
select(-ChildName)
# Check the updated dataset
head(muac_dews_data)
## MUACIndicatorID QID County SubCounty Ward LivelihoodZone Month
## 1 15648 8659 Baringo Baringo South Ilchamus Irrigated Cropping July
## 2 15649 8659 Baringo Baringo South Ilchamus Irrigated Cropping July
## 3 15650 8659 Baringo Baringo South Ilchamus Irrigated Cropping July
## 4 15651 8659 Baringo Baringo South Ilchamus Irrigated Cropping July
## 5 15652 8659 Baringo Baringo South Ilchamus Irrigated Cropping July
## 6 15653 8660 Baringo Baringo South Ilchamus Irrigated Cropping July
## Year HouseholdCode Gender MUAC MUAC_Color AgeInMonths
## 1 2016 1 Male 148 53 TRUE
## 2 2016 1 Male 139 17 TRUE
## 3 2016 1 Male 149 29 FALSE
## 4 2016 1 Male 149 53 FALSE
## 5 2016 1 Female 142 20 FALSE
## 6 2016 2 Female 144 53 TRUE
## LiveInHousehold SufferedIllnesses InterviewDate DivisionID
## 1 None 42553 1142 9
## 2 None 42553 1142 9
## 3 Diarrhea 42553 1142 9
## 4 None 42553 1142 9
## 5 Fever with chills like malaria 42553 1142 9
## 6 None 42553 1142 9
## CountyID SiteID LivelihoodZoneID
## 1 1497 4 NA
## 2 1497 4 NA
## 3 1497 4 NA
## 4 1497 4 NA
## 5 1497 4 NA
## 6 1497 4 NA
We also have to ensure that the “InterviewDate” column is parsed correctly as a date. The row values are displaced to the “SufferedIllnesses” column. We align this and proceed to parse the date correctly.
# Create a mask for rows to modify
rows_to_modify <- which(muac_dews_data$Year %in% 2016:2019)
# Ensure the columns being shifted are correctly specified
# We will create an index to specify which columns to shift
shift_columns <- c("MUAC_Color", "AgeInMonths", "LiveInHousehold",
"SufferedIllnesses", "InterviewDate",
"DivisionID", "CountyID", "SiteID", "LivelihoodZoneID")
# Create an empty data frame for the shifted values
shifted_values <- muac_dews_data[rows_to_modify, shift_columns]
# Replace the values in the original DataFrame with NA in the selected rows
muac_dews_data[rows_to_modify, shift_columns] <- NA
# Move the values one column to the right
for (i in seq_along(shift_columns)[-length(shift_columns)]) {
muac_dews_data[rows_to_modify, shift_columns[i + 1]] <- shifted_values[[i]]
}
library(lubridate)
# Ensure the column is numeric
muac_dews_data$InterviewDate <- as.numeric(muac_dews_data$InterviewDate)
# Convert the Excel serial date to R Date
muac_dews_data$InterviewDate <- as.Date(muac_dews_data$InterviewDate, origin = "1899-12-30")
# Verify the output
head(muac_dews_data$InterviewDate)
## [1] "2016-07-02" "2016-07-02" "2016-07-02" "2016-07-02" "2016-07-02"
## [6] "2016-07-02"
Save this final sheet to the existing workbook
# Define the path for the existing Excel workbook
existing_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/Baringo.xlsx"
# Load the existing workbook
wb <- loadWorkbook(existing_file_path)
# Add the KIA DEWS data to the existing workbook
addWorksheet(wb, "MUAC DEWS")
writeData(wb, "MUAC DEWS", muac_dews_data)
# Save the updated workbook
saveWorkbook(wb, existing_file_path, overwrite = TRUE)