This provides steps and code for data cleaning, management, and analysis of Continuity of Essential Health Services assessment survey. See here for the questionnare. (Standard module version as of 3/22/2021)
This code:
1. Imports and cleans Continuity of Essential Health Services (aka "EHS") dataset from Lime Survey (i.e., green and blue tabs in the chartbook),
2. Creates field check tables for data quality monitoring, and
3. Creates indicator estimate data for dashboards and chartbook (purple tab in the chartbook).
FOUR parts must be updated, based on the minmum country-specific adaptation.
1. Directories and local macro in A. SETTING per survey implementation information
2. Local macro in E.1. Country speciic code local per section 1
3. Staff variables in E.2. Construct analysis variables per section 2
4. Option for sampling weights in E.3. Merge with sampling weight per sentinel facility selection design
Depending on the extent of additional country-specific adaptation, further revise E.2. Construct analysis variables.
(Last updated: 2021-07-01 20:21:38)
Note: For the URL, we need to use part of the country overview page for the data server. For example, suppose the overview page link looks like this for a country named YYY:
https://extranet.who.int/dataformv3/index.php/plugins/direct?plugin=CountryOverview&country=YYY&password=XXXXXXXXX.
Replace part of the link before plugins
with the part in the country-specific link. So, in this example, the code should be:
dtaraw<-read.csv(paste0("https://extranet.who.int/dataformv3/index.php/plugins/direct?plugin=CountryOverview&docType=1&sid=",surveyid,"&language=en&function=createExport"))
As opposed to the above:
dtaraw<-read.csv(paste0("https://who.my-survey.host/index.php/plugins/direct?plugin=CountryOverview&docType=1&sid=",surveyid,"&language=en&function=createExport"))
As of 2021-07-01 20:21:38, the downloaded raw data has 81 observations and 646 variables.
Then, export the raw data into the chartbook (green tab: "Facility-level raw data") - as is.
Assess duplicate rows based on facility code: Q101. Among the duplicate rows, keep the latest row, based on the submission date/time.
A total of 2 duplicate rows from 1 unique facilities were identified based on Q101. Now keeping only one unique row per facility, the dataset has 80 observations.
Check variables section by scion, drop prefix "A" in categorical/character variables, and convert to numeric.
The following is value labels.
#delimit;
lab define q104
1"1.Urban"
2"2.Rural";
lab values q104 q104;
lab define q105
1"1.Level2: Dispensary"
2"2.Level3: Health Centre"
3"3.Level4: Primary Hospital"
4"4.Level5: Secondary level hospital and above"
5"5.Level6: Tertiary level" ;
lab values q105 q105; /*corresponding to the model example context*/
lab define q106
1"1.Government"
2"2.Private"
3"3.NGO"
4"4.FBO"
5"5.Other";
lab values q106 q106; /*corresponding to the model example context*/
lab define q302
1"1. Yes - user fees exempted only for COVID-19 services"
2"2. Yes - user fees exempted only for other health services"
3"3. Yes - user fees exempted for both COVID-19 and other health services"
4"4. No";
lab values q302 q302;
lab define q305
1"1. Yes - for COVID-19 case management services"
2"2. Yes - for other essential health services"
3"3. Yes - for both COVID-19 and other services"
4"4. No"
5"5. Do not know";
lab values q305 q305;
lab define change /*KECT - q409a is weird (asks if all increased, all decreased, no changes, mixed), so it cannot be included here, changed from q409* to q409_* to address this issue*/
1"1.Yes, increased"
2"2.Yes, decreased"
3"3.No"
4"4.N/A";
foreach var of varlist q409_* q412* q414 q415 q417* q420*{;
lab values `var' change;
};
lab define q409a
1"1.Yes, increased in all areas"
2"2.Yes, decreased in all areas"
3"3.Yes, increased in some but decreased in some"
4"4.No changes in any";
foreach var of varlist q409a {;
lab values `var' q409a;
};
lab define q417
1"1.Yes, less frequent"
2"2.Yes, suspended"
3"3.No change"
4"4.yes, increased"
5"5.N/A";
foreach var of varlist q417* {;
lab values `var' q417;
};
lab define q420
1"1.Yes, planned & implemented"
2"2.Yes, planned but not yet implemented"
3"3.No"
4"4.N/A";
foreach var of varlist q420* {;
lab values `var' q420;
};
lab define q421
1"1.Not at all"
2"2.Slightly"
3"3.Moderately"
4"4.Quite a lot"
5"5.A great deal";
foreach var of varlist q421* {;
lab values `var' q421;
};
lab define ppe
1"1.Currently available for all health workers"
2"2.Currently available only for some health workers"
3"3.Currently unavailable for any health workers"
4"4.Not applicable - never procured or provided" ;
foreach var of varlist q507* {;
lab values `var' ppe;
};
lab define q604
1"1.Yes, PCR"
2"2.Yes, RDT"
3"2.Yes, PCR & RDT"
4"5.No";
lab values q604 q604;
lab define availfunc
1"1.Available, functional"
2"2.Available, but not functional"
3"3.Not available";
foreach var of varlist q802_* q803_* q805_* q903 q904 {;
lab values `var' availfunc ;
};
lab define icepack
1"1.Yes, a set of ice packs for all cold boxes"
2"1.Yes, a set of ice packs only for some cold boxes"
3"3.No";
foreach var of varlist q907 q910 {;
lab values `var' icepack ;
};
lab define icepackfreeze
1"1.All"
2"2.Only some"
3"3.None-no functional freezer" ;
lab values q911 icepackfreeze ;
lab define yesno 1"1.Yes" 0"0.No";
foreach var of varlist
q202 q205_* q206 q207*
q301 q304 q306_* q307 q308 q310
q402 - q405 q406_* q407 q408 q410_* q411_* q416 q418 q419
q501 q502 q503_* q504 q505_* q506
q601 q602 q603 q605 q606 q609 q610 q614-q615
q701_* q702_* q703_* q704
q801 q804
q901 q902 q905 q908 q912
{;
labe values `var' yesno;
};
lab define yesnona
1"1.Yes"
2"2.No"
3"3.N/A";
foreach var of varlist q204 q309 {;
labe values `var' yesnona;
};
lab define alwayssometimesnever
1"1.Always"
2"2.Sometimes"
3"3.Never";
foreach var of varlist q607_* q608_* {;
labe values `var' alwayssometimesnever;
};
#delimit cr
As of 2021-07-02, the following are "field check tables." In Stata program, xls file is created. In R, the results are directly presented in this markdown file.
Assess interview characteristics among all interviews.
Assess level of missing responses among COMPLETED interviews.
Update code here based on the questionnaire in the country
Created analysis variables section by section.
* give prefix z for background characteristics, which can be used as analysis strata
* give prefix x for binary variables, which will be used to calculate percentage
* give prefix y for integer/continuous variables, which will be used to calculate total number
#####* Section 5: IPC
dta<-dta%>%
mutate(
xipcpp= q501==1,
xsafe= q502==1,
xguideline= q504==1,
xppe=q506==1)%>%
mutate_if(is.logical, as.numeric)%>%
mutate_at(vars(xipcpp, xsafe, xguideline, xppe),
~replace(., is.na(.)==TRUE, 0))%>%
rename_all(.funs = funs(sub("q503_", "xsafe__", .)))%>%
rename_all(.funs = funs(sub("q505_", "xguideline__", .)))%>%
mutate_at(vars(starts_with("xsafe__"), starts_with("xguideline__")),
~replace(., is.na(.)==TRUE, 0))
varlist<-dta%>%select(starts_with("xsafe__"))%>%colnames()
dta<-dta%>%
mutate(
max=11,
temp=rowSums(dta[ , varlist], na.rm=TRUE),
xsafe_score =100*(temp/max),
xsafe_100 =xsafe_score>=100,
xsafe_50 =xsafe_score>=50
)
varlist<-dta%>%select(starts_with("xguideline__"))%>%colnames()
dta<-dta%>%
mutate(
max=6,
temp=rowSums(dta[colnames(select(dta, starts_with("xguideline__")))],
na.rm=TRUE ),
xguideline_score =100*(temp/max),
xguideline_100 =xguideline_score>=100,
xguideline_50 =xguideline_score>=50
)
Question 1: What if there are seven guidelines, as opposed to six in the standard questionnaire?
dta<-dta%>%
mutate(
# CHANGE max
max=7,
temp=rowSums(dta[colnames(select(dta, starts_with("xguideline__")))],
na.rm=TRUE ),
xguidelineA_score =100*(temp/max),
xguidelineA_100 =xguidelineA_score>=100,
xguidelineA_50 =xguidelineA_score>=50
)
summary(dta$xguideline_score)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 25.00 83.33 60.83 100.00 100.00
summary(dta$xguidelineA_score)#NEW
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 21.43 71.43 52.14 85.71 85.71
Question 2: What if we want to calculate percent/proportion of sentinel facilities that scored 90 and 70 out of the possible score of 100?
dta<-dta%>%
mutate(
# CHANGE cutoff value
xguideline_90 =xguideline_score>=90,
xguideline_70 =xguideline_score>=70
)%>%
mutate_if(is.logical, as.numeric)
summary(dta$xguideline_100)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 0.00 0.35 1.00 1.00
summary(dta$xguideline_90)#NEW
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 0.00 0.35 1.00 1.00
summary(dta$xguideline_70)#NEW
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 1.00 0.55 1.00 1.00
summary(dta$xguideline_50)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 1.00 0.65 1.00 1.00
Question 3: What if we want to know percent/proportion of sentinel facilities that have the first two guidelines, as opposed to all guidelines?
#table(dta$xguideline__001, dta$xguideline__002)
dta<-dta%>%
mutate(
xguideline_firsttwo = (xguideline__001==1 & xguideline__002==1)
)%>%
mutate_if(is.logical, as.numeric)
summary(dta$xguideline_firsttwo)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0 0.0 1.0 0.7 1.0 1.0
Question 4: What if we want to create a new variable, the number of guidelines available at the facility?
dta<-dta%>%
mutate(
xnum_guide = xguideline__001+xguideline__002+xguideline__003+
xguideline__004+xguideline__005+xguideline__006,
#which is same with the following
xnum_guideA=rowSums(dta[colnames(select(dta, starts_with("xguideline__")))],
na.rm=TRUE )
)
summary(dta$xnum_guide)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 1.50 5.00 3.65 6.00 6.00
summary(dta$xnum_guideA)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 1.50 5.00 3.65 6.00 6.00
Question 5: What if we want to count the total number of sentinel facilities that have 4 or more guidelines?
dta<-dta%>%
mutate(
#first variable name starts with "y"
ynum_guide4=xnum_guide>=4
)%>%
mutate_if(is.logical, as.numeric)
#table(dta$xnum_guide, dta$ynum_guide4)
# AND make sure "dtasummaryy" includes "ynum" in F.1.
Select Option A (if threre are sampling weights) or Option B (if there is no sampling weight).
Option A
Option B
dtatemp<-dta%>%
mutate(
obs=1,
obs_userfee=NA,
obs_ipt=NA,
obs_er=NA,
obs_vac=NA,
obs_covax=NA,
obs_primary=NA,
obs_hospital=NA,
obs_userfee=ifelse( xuserfee==1, 1, obs_userfee),
obs_ipt=ifelse( xipt==1, 1, obs_ipt),
obs_er=ifelse( xer==1, 1, obs_er),
obs_vac=ifelse( xvac==1, 1, obs_vac),
obs_covax=ifelse( xcovax==1, 1, obs_covax),
obs_primary=ifelse( zlevel_low==1, 1, obs_primary),
obs_hospital=ifelse( zlevel_low==0, 1, obs_hospital),
obshmis_opt=rowSums(dta[colnames(select(dta, starts_with("vol_opt_")))], na.rm=TRUE )>0 ,
obshmis_ipt=rowSums(dta[colnames(select(dta, starts_with("vol_ipt_")))], na.rm=TRUE )>0 ,
obshmis_del=rowSums(dta[colnames(select(dta, starts_with("vol_del_")))], na.rm=TRUE )>0 ,
obshmis_dpt=rowSums(dta[colnames(select(dta, starts_with("vol_dpt_")))], na.rm=TRUE )>0
)
dtatempx<-dtatemp%>%select(country, round, month, year, weight, starts_with("z"),
starts_with("x"))
dtatempy<-dtatemp%>%
select(country, round, month, year, weight, starts_with("z"),
starts_with("obs"), starts_with("y"), starts_with("staff"),
starts_with("vol"))%>%
mutate_if(is.logical, as.numeric)
# Among all facilities
dtasummaryx<-dtatempx%>%
group_by(country, round, month, year)%>%
summarize_at(vars(starts_with("x")),
funs(weighted.mean(., weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(group="All", grouplabel="")
dtasummaryy<-dtatempy%>%
group_by(country, round, month, year)%>%
summarize_at(vars(starts_with("obs"), starts_with("yvac"), starts_with("ynum"),
starts_with("staff"), starts_with("vol")),
funs(sum(.*weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(group="All", grouplabel="")
dtasummaryall<-left_join(dtasummaryx, dtasummaryy,
by = c("country", "round", "month", "year", "group", "grouplabel"))
# By residential area
dtasummaryx<-dtatempx%>%
group_by(country, round, month, year, zurban)%>%
summarize_at(vars(starts_with("x")),
funs(weighted.mean(., weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Location",
grouplabel="",
grouplabel= ifelse(zurban==0, "1.1 Rural" , grouplabel),
grouplabel= ifelse(zurban==1, "1.2 Urban" , grouplabel) )
dtasummaryy<-dtatempy%>%
group_by(country, round, month, year, zurban)%>%
summarize_at(vars(starts_with("obs"), starts_with("yvac"), starts_with("ynum"),
starts_with("staff"), starts_with("vol")),
funs(sum(.*weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Location",
grouplabel="",
grouplabel= ifelse(zurban==0, "1.1 Rural" , grouplabel),
grouplabel= ifelse(zurban==1, "1.2 Urban" , grouplabel) )
dtasummarylocation<-left_join(dtasummaryx, dtasummaryy,
by = c("country", "round", "month", "year", "group", "grouplabel"))%>%
select(-starts_with("z"))
# By facility type
dtasummaryx<-dtatempx%>%
group_by(country, round, month, year, zlevel_hospital)%>%
summarize_at(vars(starts_with("x")),
funs(weighted.mean(., weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Level",
grouplabel="",
grouplabel= ifelse(zlevel_hospital==0, "2.1 Non-hospitals" , grouplabel),
grouplabel= ifelse(zlevel_hospital==1, "2.2 Hospitals" , grouplabel) )
dtasummaryy<-dtatempy%>%
group_by(country, round, month, year, zlevel_hospital)%>%
summarize_at(vars(starts_with("obs"), starts_with("yvac"), starts_with("ynum"),
starts_with("staff"), starts_with("vol")),
funs(sum(.*weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Level",
grouplabel="",
grouplabel= ifelse(zlevel_hospital==0, "2.1 Non-hospitals" , grouplabel),
grouplabel= ifelse(zlevel_hospital==1, "2.2 Hospitals" , grouplabel) )
dtasummarylevel<-left_join(dtasummaryx, dtasummaryy,
by = c("country", "round", "month", "year", "group", "grouplabel"))%>%
select(-starts_with("z"))
# By facility managing authority
dtasummaryx<-dtatempx%>%
group_by(country, round, month, year, zpub)%>%
summarize_at(vars(starts_with("x")),
funs(weighted.mean(., weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Sector",
grouplabel="",
grouplabel= ifelse(zpub==0, "3.2 Non-public" , grouplabel),
grouplabel= ifelse(zpub==1, "3.1 Public" , grouplabel) )
dtasummaryy<-dtatempy%>%
group_by(country, round, month, year, zpub)%>%
summarize_at(vars(starts_with("obs"), starts_with("yvac"), starts_with("ynum"),
starts_with("staff"), starts_with("vol")),
funs(sum(.*weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Sector",
grouplabel="",
grouplabel= ifelse(zpub==0, "3.2 Non-public" , grouplabel),
grouplabel= ifelse(zpub==1, "3.1 Public" , grouplabel) )
dtasummarysector<-left_join(dtasummaryx, dtasummaryy,
by = c("country", "round", "month", "year", "group", "grouplabel"))%>%
select(-starts_with("z"))
Question 6: Can we add a different analysis domain (e.g., detailed facility level) in the summary indicator data?
group_by
, to include the new domain, zlevel as an example# By facility type - detail
dtasummaryx<-dtatempx%>%
group_by(country, round, month, year, zlevel)%>%
summarize_at(vars(starts_with("x")),
funs(weighted.mean(., weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Level - detail",
grouplabel=zlevel )
dtasummaryy<-dtatempy%>%
group_by(country, round, month, year, zlevel)%>%
summarize_at(vars(starts_with("obs"), starts_with("yvac"), starts_with("ynum"),
starts_with("staff"), starts_with("vol")),
funs(sum(.*weight, na.rm = TRUE)))%>%
ungroup()%>%
mutate(
group="Level - detail",
grouplabel=zlevel )
dtasummaryleveldetail<-left_join(dtasummaryx, dtasummaryy,
by = c("country", "round", "month", "year", "group", "grouplabel"))%>%
select(-starts_with("z"))
rbind
below# Append all
dim(dtasummaryall)
dim(dtasummarylocation)
dim(dtasummarylevel)
dim(dtasummaryleveldetail)
dim(dtasummarysector)
dtasummary<-rbind(dtasummaryall, dtasummarylocation, dtasummarylevel, dtasummaryleveldetail, dtasummarysector)
END OF MARKDOWN FILE