The following are steps undertaken for deidentifying NDMA data. This dataset covers 2020 through to June 2024 for MUAC.
# Load required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
library(geosphere)
## Warning: package 'geosphere' was built under R version 4.3.3
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
file_path <- "C:/Users/AAH USER/Downloads/03_MUAC_NDMA_2020 to June 2024.xlsx"
# Read the specific MUAC sheet into a data frame
muac_2020_2024 <- read.xlsx(file_path, sheet = "MUAC")
This data set has P.I.I’s in the “ChildName” column so we will drop that, ensure the Interview date column is parsed correctly and save this subsequent sheet to the workbook.
# Drop the specified PII column
muac_2020_2024 <- muac_2020_2024 %>%
select(-ChildName)
# Ensure the column is numeric
muac_2020_2024$InterviewDate <- as.numeric(muac_2020_2024$InterviewDate)
# Convert the numeric date to Date format
muac_2020_2024$InterviewDate <- as.Date(muac_2020_2024$InterviewDate, origin = "1899-12-30")
# Check the updated dataset
head(muac_2020_2024)
## MUACIndicatorID QID County SubCounty Ward
## 1 1012350 344618 Tharaka Nithi Tharaka North Gatunga
## 2 1012351 344619 Tharaka Nithi Tharaka North Gatunga
## 3 1012352 344617 Tharaka Nithi Tharaka North Gatunga
## 4 1012353 344622 Tharaka Nithi Tharaka North Gatunga
## 5 1012356 344620 Tharaka Nithi Tharaka North Gatunga
## 6 1012575 344689 Kwale Kinango Chengoni-Samburu
## LivelihoodZone Month Year HouseholdCode Gender MUAC MUAC_Color
## 1 Marginal Mixed Farming January 2022 TNC0321 <NA> NA <NA>
## 2 Marginal Mixed Farming January 2022 TNC0326 <NA> NA <NA>
## 3 Marginal Mixed Farming January 2022 TNC0322 Female NA Green
## 4 Marginal Mixed Farming January 2022 TNC0329 Female NA Green
## 5 Marginal Mixed Farming January 2022 TNC0330 <NA> NA <NA>
## 6 Livestock farming January 2022 KWL0601 Female 140 Green
## AgeInMonths LiveInHousehold SufferedIllnesses InterviewDate DivisionID
## 1 NA TRUE <NA> 2022-01-02 41
## 2 NA TRUE <NA> 2022-01-02 41
## 3 43 TRUE <NA> 2022-01-02 41
## 4 45 TRUE <NA> 2022-01-02 41
## 5 NA TRUE <NA> 2022-01-02 41
## 6 21 FALSE <NA> 2022-01-03 107
## CountyID SiteID LivelihoodZoneID
## 1 12 4 6
## 2 12 4 6
## 3 12 4 6
## 4 12 4 6
## 5 12 4 6
## 6 20 222 13
We save this new sheet alongside the previous two in the workbook created
# Define the path for the new workbook
new_file_path <- "C:/Users/AAH USER/OneDrive - Action Against Hunger USA/Documents/NDMA_DeIdentified/MUAC_2020_June 2024.xlsx"
# Create a new workbook
wb <- createWorkbook()
# Add the MUAC data to the new workbook
addWorksheet(wb, "MUAC")
writeData(wb, "MUAC", muac_2020_2024)
# Save the new workbook
saveWorkbook(wb, new_file_path, overwrite = TRUE)