Description
This tools accepts a Baseline dataset as input and it generates the required Househould sampling size for each Location
Main functions
The sample size function implements the formula to calculate the sample size for a Chi squared distribution with 1 degree of freedom. see Sample Size Table.
# ONLY for Notebook: do not run chunks when knitting notebook. Applies to all chunks
knitr::opts_chunk$set(eval = FALSE)
# function starts here
sample_size <- function(n, cl, prop, me){
chi_inv <- qchisq(p = cl, df = 1)
out <- (chi_inv * n * prop * (1 - prop)) / ((me^2 * (n - 1)) + (chi_inv * prop * (1 - prop)))
n_sample <- as.integer(round_0(out))
return(n_sample)
}
Configuration
Set the data and output directories and the parameters for the sampling size formula: margin of error (10%), population proportion (50%), confidence level (95%) and average household size (5)
# project data directories
data_dir <- here("R", "sample-calculator", "data")
output_dir <- here("R", "sample-calculator", "output")
# sampling parameters
margin_error <- 0.1
pop_prop <- 0.50
conf_level <- 0.95
hh_size <- 5
Data Load
Define the data file containing the Baseline dataset to be loaded. Use read.xlsx() from the openxlsx library with the detectDates option to FALSE as we do not need dates here and sometimes can create type conversion issues in case the date is not properly detected by the read.xlsx() function.
# target file
datafile <- "05 MNM May 2019 - Shared Dataset.xlsx"
filepath <- paste0(data_dir, "/", datafile)
filename <- filename(datafile)
# load Baseline dataset
Baseline_dataset <- read.xlsx(filepath, sheet = 3, detectDates = FALSE, check.names = TRUE)
Process
- Calculate new colum with number of Households HHs
- Group by Nahya.PCODE
- Calculate summarised data for each Nahya. In order to keep those columns which are not summarised in the result, use
first() to pick up the first value (as it is the same over the whole column for a given Nahya)
- Use
everything() inside a select() in order to choose all the columns or the remaining columns after selecting specific ones
- Perform a
left_join() with the Baseline_dataset using Nahya.PCODE as index, in order to obtain all the rows. The previous summarised results will be repeated across the column for all the rows belonging to a given Nahya
- Use
select(-ends_with(".y")) to remove (unselect) those columns ending with a given string. Notice the - sign to indicate removal.
- Find those columns ending with a given string, and rename them replacing that string for an empty string:
- Notice the use of (.) on
str_replace(), as the function requires a dataframe input. In this case (.) refers to the current dataframe output from the dplyr pipeline
rename_at(vars(ends_with(".x")), list(~str_replace(.,".x", "")))
- The deprecated way uses
function instead of list
rename_at(vars(ends_with(".x")), function(str_replace(.,".x", "")))
mutate() is used to create new columns based on calculations performed columnwise
HH_sampling <- Baseline_dataset %>%
select(Mohafaza, Mantika.PCODE, Mantika, Nahya.PCODE, Nahya, IND = Total.Population) %>%
mutate(HHs = round_0(IND / hh_size)) %>%
group_by(Nahya.PCODE) %>%
summarise(Mohafaza = first(Mohafaza),
Mantika.PCODE = first(Mantika.PCODE),
Mantika = first(Mantika),
Nahya = first(Nahya),
Total_IND = sum(IND, na.rm = TRUE),
Total_HHs = sum(HHs, na.rm = TRUE),
Sample_Size = sample_size(Total_HHs, conf_level, pop_prop, margin_error)) %>%
select(Mohafaza, Mantika.PCODE, Mantika, Nahya.PCODE, everything()) %>%
left_join(Baseline_dataset, by = "Nahya.PCODE") %>%
select(-ends_with(".y")) %>%
select(-ends_with(".IND")) %>%
rename_at(vars(ends_with(".x")), list(~str_replace(.,".x", ""))) %>%
mutate(`Total HH` = round_0(Total.Population / hh_size), `HH to be assesed` = round_0(Sample_Size * `Total HH` / Total_HHs)) %>%
select(Mohafaza, Mantika.PCODE, Mantika, Nahya.PCODE, Nahya, Location.PCODE, Location.Name, `Total Individual`= Total.Population, `Total HH`, `D HH Total`= Total_HHs, `Master HH sample size`= Sample_Size, `HH to be assesed`)
Output
Export the result as an Excel table. Use paste0() to combine strings in order to generate the output filepath for the resulting Excel file.
# export result into excel
output_file <- paste0(output_dir, "/", filename, " - HH Sampling.xlsx")
wb <- createWorkbook()
addWorksheet(wb, "HH Sample Size")
writeDataTable(wb, sheet = 1, HH_sampling)
saveWorkbook(wb, file = output_file, overwrite = TRUE)
LS0tCnRpdGxlOiAiU2FtcGxlIFNpemUgQ2FsY3VsYXRvciIKb3V0cHV0OgogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKLS0tCiMjIyBEZXNjcmlwdGlvbgpUaGlzIHRvb2xzIGFjY2VwdHMgYSBCYXNlbGluZSBkYXRhc2V0IGFzIGlucHV0IGFuZCBpdCBnZW5lcmF0ZXMgdGhlIHJlcXVpcmVkIEhvdXNlaG91bGQgc2FtcGxpbmcgc2l6ZSBmb3IgZWFjaCBMb2NhdGlvbgoKIyMjIE1haW4gZnVuY3Rpb25zCgpUaGUgKipzYW1wbGUgc2l6ZSoqIGZ1bmN0aW9uIGltcGxlbWVudHMgdGhlIGZvcm11bGEgdG8gY2FsY3VsYXRlIHRoZSBzYW1wbGUgc2l6ZSBmb3IgYSAqQ2hpIHNxdWFyZWQgZGlzdHJpYnV0aW9uKiB3aXRoIDEgZGVncmVlIG9mIGZyZWVkb20uIFtzZWUgU2FtcGxlIFNpemUgVGFibGVdKGh0dHBzOi8vd3d3LnJlc2VhcmNoLWFkdmlzb3JzLmNvbS90b29scy9TYW1wbGVTaXplLmh0bSkuCgpgYGB7cn0KIyBPTkxZIGZvciBOb3RlYm9vazogZG8gbm90IHJ1biBjaHVua3Mgd2hlbiBrbml0dGluZyBub3RlYm9vay4gQXBwbGllcyB0byBhbGwgY2h1bmtzCmtuaXRyOjpvcHRzX2NodW5rJHNldChldmFsID0gRkFMU0UpCiMgZnVuY3Rpb24gc3RhcnRzIGhlcmUKc2FtcGxlX3NpemUgPC0gZnVuY3Rpb24obiwgY2wsIHByb3AsIG1lKXsKICAKICBjaGlfaW52IDwtIHFjaGlzcShwID0gY2wsIGRmID0gMSkKICBvdXQgPC0gKGNoaV9pbnYgKiBuICogcHJvcCAqICgxIC0gcHJvcCkpIC8gKChtZV4yICogKG4gLSAxKSkgKyAoY2hpX2ludiAqIHByb3AgKiAoMSAtIHByb3ApKSkKICBuX3NhbXBsZSA8LSBhcy5pbnRlZ2VyKHJvdW5kXzAob3V0KSkKICByZXR1cm4obl9zYW1wbGUpCiAgCn0KYGBgCgojIyMgQ29uZmlndXJhdGlvbgoKU2V0IHRoZSAqZGF0YSogYW5kICpvdXRwdXQqIGRpcmVjdG9yaWVzIGFuZCB0aGUgcGFyYW1ldGVycyBmb3IgdGhlIHNhbXBsaW5nIHNpemUgZm9ybXVsYTogbWFyZ2luIG9mIGVycm9yICgxMCUpLCBwb3B1bGF0aW9uIHByb3BvcnRpb24gKDUwJSksIGNvbmZpZGVuY2UgbGV2ZWwgKDk1JSkgYW5kIGF2ZXJhZ2UgaG91c2Vob2xkIHNpemUgKDUpCmBgYHtyfQojIHByb2plY3QgZGF0YSBkaXJlY3RvcmllcwpkYXRhX2RpciA8LSBoZXJlKCJSIiwgInNhbXBsZS1jYWxjdWxhdG9yIiwgImRhdGEiKQpvdXRwdXRfZGlyIDwtIGhlcmUoIlIiLCAic2FtcGxlLWNhbGN1bGF0b3IiLCAib3V0cHV0IikKCiMgc2FtcGxpbmcgcGFyYW1ldGVycwptYXJnaW5fZXJyb3IgPC0gMC4xCnBvcF9wcm9wIDwtIDAuNTAKY29uZl9sZXZlbCA8LSAwLjk1CmhoX3NpemUgPC0gNQpgYGAKCiMjIyBEYXRhIExvYWQKCkRlZmluZSB0aGUgZGF0YSBmaWxlIGNvbnRhaW5pbmcgdGhlIEJhc2VsaW5lIGRhdGFzZXQgdG8gYmUgbG9hZGVkLiBVc2UgYHJlYWQueGxzeCgpYCBmcm9tIHRoZSAqKm9wZW54bHN4KiogbGlicmFyeSB3aXRoIHRoZSBgZGV0ZWN0RGF0ZXNgIG9wdGlvbiB0byBgRkFMU0VgIGFzIHdlIGRvIG5vdCBuZWVkIGRhdGVzIGhlcmUgYW5kIHNvbWV0aW1lcyBjYW4gY3JlYXRlIHR5cGUgY29udmVyc2lvbiBpc3N1ZXMgaW4gY2FzZSB0aGUgZGF0ZSBpcyBub3QgcHJvcGVybHkgZGV0ZWN0ZWQgYnkgdGhlIGByZWFkLnhsc3goKWAgZnVuY3Rpb24uCgpgYGB7cn0KIyB0YXJnZXQgZmlsZQpkYXRhZmlsZSA8LSAiMDUgTU5NIE1heSAyMDE5IC0gU2hhcmVkIERhdGFzZXQueGxzeCIKZmlsZXBhdGggPC0gcGFzdGUwKGRhdGFfZGlyLCAiLyIsIGRhdGFmaWxlKQpmaWxlbmFtZSA8LSBmaWxlbmFtZShkYXRhZmlsZSkKCiMgbG9hZCBCYXNlbGluZSBkYXRhc2V0CkJhc2VsaW5lX2RhdGFzZXQgPC0gcmVhZC54bHN4KGZpbGVwYXRoLCBzaGVldCA9IDMsIGRldGVjdERhdGVzID0gRkFMU0UsIGNoZWNrLm5hbWVzID0gVFJVRSkKYGBgCgojIyMgUHJvY2VzcwoKKEApIENhbGN1bGF0ZSBuZXcgY29sdW0gd2l0aCBudW1iZXIgb2YgSG91c2Vob2xkcyAqKkhIcyoqCihAKSBHcm91cCBieSAqKk5haHlhLlBDT0RFKioKKEApIENhbGN1bGF0ZSBzdW1tYXJpc2VkIGRhdGEgZm9yIGVhY2ggKipOYWh5YSoqLiBJbiBvcmRlciB0byBrZWVwIHRob3NlIGNvbHVtbnMgd2hpY2ggYXJlIG5vdCBzdW1tYXJpc2VkIGluIHRoZSByZXN1bHQsIHVzZSBgZmlyc3QoKWAgdG8gcGljayB1cCB0aGUgZmlyc3QgdmFsdWUgKGFzIGl0IGlzIHRoZSBzYW1lIG92ZXIgdGhlIHdob2xlIGNvbHVtbiBmb3IgYSBnaXZlbiAqTmFoeWEqKQooQCkgVXNlIGBldmVyeXRoaW5nKClgIGluc2lkZSBhIGBzZWxlY3QoKWAgaW4gb3JkZXIgdG8gY2hvb3NlIGFsbCB0aGUgY29sdW1ucyBvciB0aGUgcmVtYWluaW5nIGNvbHVtbnMgYWZ0ZXIgc2VsZWN0aW5nIHNwZWNpZmljIG9uZXMKKEApIFBlcmZvcm0gYSBgbGVmdF9qb2luKClgIHdpdGggdGhlICpCYXNlbGluZV9kYXRhc2V0KiB1c2luZyAqKk5haHlhLlBDT0RFKiogYXMgaW5kZXgsIGluIG9yZGVyIHRvIG9idGFpbiBhbGwgdGhlIHJvd3MuIFRoZSBwcmV2aW91cyBzdW1tYXJpc2VkIHJlc3VsdHMgd2lsbCBiZSByZXBlYXRlZCBhY3Jvc3MgdGhlIGNvbHVtbiBmb3IgYWxsIHRoZSByb3dzIGJlbG9uZ2luZyB0byBhIGdpdmVuICoqTmFoeWEqKgooQCkgVXNlIGBzZWxlY3QoLWVuZHNfd2l0aCgiLnkiKSlgIHRvIHJlbW92ZSAodW5zZWxlY3QpIHRob3NlIGNvbHVtbnMgZW5kaW5nIHdpdGggYSBnaXZlbiBzdHJpbmcuIE5vdGljZSB0aGUgKiotKiogc2lnbiB0byBpbmRpY2F0ZSByZW1vdmFsLgooQCkgRmluZCB0aG9zZSBjb2x1bW5zIGVuZGluZyB3aXRoIGEgZ2l2ZW4gc3RyaW5nLCBhbmQgcmVuYW1lIHRoZW0gcmVwbGFjaW5nIHRoYXQgc3RyaW5nIGZvciBhbiBlbXB0eSBzdHJpbmc6CiAgICArIE5vdGljZSB0aGUgdXNlIG9mICguKSBvbiBgc3RyX3JlcGxhY2UoKWAsIGFzIHRoZSBmdW5jdGlvbiByZXF1aXJlcyBhIGRhdGFmcmFtZSBpbnB1dC4gSW4gdGhpcyBjYXNlICguKSByZWZlcnMgdG8gdGhlIGN1cnJlbnQgZGF0YWZyYW1lIG91dHB1dCBmcm9tIHRoZSAqZHBseXIqIHBpcGVsaW5lCiAgICArIGByZW5hbWVfYXQodmFycyhlbmRzX3dpdGgoIi54IikpLCBsaXN0KH5zdHJfcmVwbGFjZSguLCIueCIsICIiKSkpYAogICAgKyBUaGUgZGVwcmVjYXRlZCB3YXkgdXNlcyBgZnVuY3Rpb25gIGluc3RlYWQgb2YgYGxpc3RgCiAgICArIGByZW5hbWVfYXQodmFycyhlbmRzX3dpdGgoIi54IikpLCBmdW5jdGlvbihzdHJfcmVwbGFjZSguLCIueCIsICIiKSkpYAooQCkgYG11dGF0ZSgpYCBpcyB1c2VkIHRvIGNyZWF0ZSBuZXcgY29sdW1ucyBiYXNlZCBvbiBjYWxjdWxhdGlvbnMgcGVyZm9ybWVkICpjb2x1bW53aXNlKgoKYGBge3J9CkhIX3NhbXBsaW5nIDwtIEJhc2VsaW5lX2RhdGFzZXQgJT4lIAogIHNlbGVjdChNb2hhZmF6YSwgTWFudGlrYS5QQ09ERSwgTWFudGlrYSwgTmFoeWEuUENPREUsIE5haHlhLCBJTkQgPSBUb3RhbC5Qb3B1bGF0aW9uKSAlPiUgCiAgbXV0YXRlKEhIcyA9IHJvdW5kXzAoSU5EIC8gaGhfc2l6ZSkpICU+JSAKICBncm91cF9ieShOYWh5YS5QQ09ERSkgJT4lIAogIHN1bW1hcmlzZShNb2hhZmF6YSA9IGZpcnN0KE1vaGFmYXphKSwKICAgICAgICAgICAgTWFudGlrYS5QQ09ERSA9IGZpcnN0KE1hbnRpa2EuUENPREUpLAogICAgICAgICAgICBNYW50aWthID0gZmlyc3QoTWFudGlrYSksCiAgICAgICAgICAgIE5haHlhID0gZmlyc3QoTmFoeWEpLAogICAgICAgICAgICBUb3RhbF9JTkQgPSBzdW0oSU5ELCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICBUb3RhbF9ISHMgPSBzdW0oSEhzLCBuYS5ybSA9IFRSVUUpLAogICAgICAgICAgICBTYW1wbGVfU2l6ZSA9IHNhbXBsZV9zaXplKFRvdGFsX0hIcywgY29uZl9sZXZlbCwgcG9wX3Byb3AsIG1hcmdpbl9lcnJvcikpICU+JSAKICBzZWxlY3QoTW9oYWZhemEsIE1hbnRpa2EuUENPREUsIE1hbnRpa2EsIE5haHlhLlBDT0RFLCBldmVyeXRoaW5nKCkpICU+JSAKICBsZWZ0X2pvaW4oQmFzZWxpbmVfZGF0YXNldCwgYnkgPSAiTmFoeWEuUENPREUiKSAlPiUgCiAgc2VsZWN0KC1lbmRzX3dpdGgoIi55IikpICU+JSAKICBzZWxlY3QoLWVuZHNfd2l0aCgiLklORCIpKSAlPiUgCiAgcmVuYW1lX2F0KHZhcnMoZW5kc193aXRoKCIueCIpKSwgbGlzdCh+c3RyX3JlcGxhY2UoLiwiLngiLCAiIikpKSAlPiUKICBtdXRhdGUoYFRvdGFsIEhIYCA9IHJvdW5kXzAoVG90YWwuUG9wdWxhdGlvbiAvIGhoX3NpemUpLCBgSEggdG8gYmUgYXNzZXNlZGAgPSByb3VuZF8wKFNhbXBsZV9TaXplICogYFRvdGFsIEhIYCAvIFRvdGFsX0hIcykpICU+JSAKICBzZWxlY3QoTW9oYWZhemEsIE1hbnRpa2EuUENPREUsIE1hbnRpa2EsIE5haHlhLlBDT0RFLCBOYWh5YSwgTG9jYXRpb24uUENPREUsIExvY2F0aW9uLk5hbWUsIGBUb3RhbCBJbmRpdmlkdWFsYD0gVG90YWwuUG9wdWxhdGlvbiwgYFRvdGFsIEhIYCwgYEQgSEggVG90YWxgPSBUb3RhbF9ISHMsIGBNYXN0ZXIgSEggc2FtcGxlIHNpemVgPSBTYW1wbGVfU2l6ZSwgYEhIIHRvIGJlIGFzc2VzZWRgKQpgYGAKCiMjIyBPdXRwdXQKCkV4cG9ydCB0aGUgcmVzdWx0IGFzIGFuICpFeGNlbCB0YWJsZSouIFVzZSBgcGFzdGUwKClgIHRvIGNvbWJpbmUgc3RyaW5ncyBpbiBvcmRlciB0byBnZW5lcmF0ZSB0aGUgb3V0cHV0IGZpbGVwYXRoIGZvciB0aGUgcmVzdWx0aW5nIEV4Y2VsIGZpbGUuCgpgYGB7cn0KIyBleHBvcnQgcmVzdWx0IGludG8gZXhjZWwKb3V0cHV0X2ZpbGUgPC0gcGFzdGUwKG91dHB1dF9kaXIsICIvIiwgZmlsZW5hbWUsICIgLSBISCBTYW1wbGluZy54bHN4IikKd2IgPC0gY3JlYXRlV29ya2Jvb2soKQphZGRXb3Jrc2hlZXQod2IsICJISCBTYW1wbGUgU2l6ZSIpCndyaXRlRGF0YVRhYmxlKHdiLCBzaGVldCA9IDEsIEhIX3NhbXBsaW5nKQpzYXZlV29ya2Jvb2sod2IsIGZpbGUgPSBvdXRwdXRfZmlsZSwgb3ZlcndyaXRlID0gVFJVRSkKYGBgCgo=