Purpose

  • The purpose of this script is to clean the raw Qualtrics dataset, document key decisions made, and generate two clean datasets for further analysis.

Loading the anonymized dataset for cleaning

Data

The raw data for was downloaded from Qualtrics on June 17, 2024.

  • The raw dataset is stored in Github directory ~WHO_UN/pilot_data

  • The payment dataset is stored in Github directory ~WHO_UN/pilot_data

  • The working file path is ~WHO_UN/pilot_data/

# file path `~WHO_UN/pilot_data`
data <- read_csv("./pilot_data/data_june17.csv")

data_payment <- read_csv("./pilot_data/payment_data.csv")

Displaying raw names of variable

names(data)
##   [1] "StartDate"                        "EndDate"                         
##   [3] "Status"                           "IPAddress"                       
##   [5] "Progress"                         "Duration (in seconds)"           
##   [7] "Finished"                         "RecordedDate"                    
##   [9] "ResponseId"                       "RecipientLastName"               
##  [11] "RecipientFirstName"               "RecipientEmail"                  
##  [13] "ExternalReference"                "LocationLatitude"                
##  [15] "LocationLongitude"                "DistributionChannel"             
##  [17] "UserLanguage"                     "consent100_timer_First Click"    
##  [19] "consent100_timer_Last Click"      "consent100_timer_Page Submit"    
##  [21] "consent100_timer_Click Count"     "consent100"                      
##  [23] "consent200_timer_First Click"     "consent200_timer_Last Click"     
##  [25] "consent200_timer_Page Submit"     "consent200_timer_Click Count"    
##  [27] "consent200"                       "screening_parent"                
##  [29] "screening_new_child"              "screening_num_child"             
##  [31] "screening_child_old"              "screening_child_vac"             
##  [33] "screening_country"                "screening_country_4_TEXT"        
##  [35] "screening_city"                   "malaria_chance_reg"              
##  [37] "malaria_chance"                   "vax_aware"                       
##  [39] "vax_aware_country"                "vax_avail"                       
##  [41] "vax_knowledge"                    "vax_status"                      
##  [43] "vax_doses"                        "vax_place"                       
##  [45] "vax_distance"                     "vax_time"                        
##  [47] "vax_info_diff"                    "vax_appoint_diff"                
##  [49] "vax_appoint_why"                  "vax_cost"                        
##  [51] "vax_concern"                      "vax_reason"                      
##  [53] "info_text_timer_First Click"      "info_text_timer_Last Click"      
##  [55] "info_text_timer_Page Submit"      "info_text_timer_Click Count"     
##  [57] "vax_likelihood"                   "region"                          
##  [59] "district"                         "clinic"                          
##  [61] "transport"                        "transport_cost_1"                
##  [63] "transport_time_7"                 "transport_time_8"                
##  [65] "novax_primary"                    "novax_primary_11_TEXT"           
##  [67] "novax_safe"                       "novax_info"                      
##  [69] "novax_info_where"                 "novax_info_what"                 
##  [71] "novax_natural"                    "novax_change"                    
##  [73] "novax_change_13_TEXT"             "trust_med_advice1_ti_First Click"
##  [75] "trust_med_advice1_ti_Last Click"  "trust_med_advice1_ti_Page Submit"
##  [77] "trust_med_advice1_ti_Click Count" "trust_med_advice1_1"             
##  [79] "trust_med_advice1_2"              "trust_med_advice1_3"             
##  [81] "trust_med_advice1_4"              "trust_med_advice1_5"             
##  [83] "trust_med_advice1_6"              "trust_med_advice1_7"             
##  [85] "trust_med_advice2_ti_First Click" "trust_med_advice2_ti_Last Click" 
##  [87] "trust_med_advice2_ti_Page Submit" "trust_med_advice2_ti_Click Count"
##  [89] "trust_med_advice2_1"              "trust_med_advice2_2"             
##  [91] "trust_med_advice2_3"              "trust_med_advice2_4"             
##  [93] "trust_med_advice2_5"              "trust_worried_1"                 
##  [95] "trust_worried_2"                  "trust_worried_3"                 
##  [97] "trust_worried_other"              "gender"                          
##  [99] "education"                        "employment"                      
## [101] "income"                           "religion"                        
## [103] "trad_healer_visit"                "risk_taking_1"                   
## [105] "altruism_1"                       "fair_punishment_1"               
## [107] "join_group"                       "phone_number"                    
## [109] "assist_text100_timer_First Click" "assist_text100_timer_Last Click" 
## [111] "assist_text100_timer_Page Submit" "assist_text100_timer_Click Count"
## [113] "assist_text100_open"              "assist_text200_timer_First Click"
## [115] "assist_text200_timer_Last Click"  "assist_text200_timer_Page Submit"
## [117] "assist_text200_timer_Click Count" "assist_text200_open"             
## [119] "C1i_4"                            "C1i_5"                           
## [121] "C1ii_1"                           "C1ii_4"                          
## [123] "D1i_4"                            "D1i_5"                           
## [125] "D1ii_1"                           "D1ii_2"                          
## [127] "C2i_4"                            "C2i_5"                           
## [129] "C2ii_1"                           "C2ii_4"                          
## [131] "D2i_4"                            "D2i_5"                           
## [133] "D2ii_1"                           "D2ii_2"                          
## [135] "reasonable_budget100_1"           "reasonable_budget200_1"          
## [137] "statement_test#1_1"               "statement_test#1_2"              
## [139] "statement_test#1_3"               "survey_difficulty"               
## [141] "value_condition"                  "msg_condition"

Create arm variable

Variable arm_coded is created to identify the treatment arm of the survey. It takes on two values: treatment_100 or treatment_200. If C1i_4 and C1i_5 or D1i_4 and D1i_5 are not empty, they are assigned to arm_coded == treatment_100. If C2i_4 and C2i_5 or D2i_4 and D2i_5 are not empty, they are assigned to arm_coded == treatment_200.

data <- data %>% mutate (arm_coded = case_when(
  !is.na(C1i_4) & !is.na(C1i_5) | !is.na(D1i_4) & !is.na(D1i_5) ~ "treatment_100",
  !is.na(C2i_4) & !is.na(C2i_5) | !is.na(D2i_4) & !is.na(D2i_5) ~ "treatment_200",
  TRUE ~ ""
))


data %>% count(arm_coded) %>% 
  kable("html", col.names=(c("Arm", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Arm N
61
treatment_100 10
treatment_200 9

Variable acc_coded is created to capture the accountability question. acc_coded == both if C1ii_1 and C1ii_4 or C2ii_1 and C2ii_4 are not empty. acc_coded == some if D1ii_1 and D1ii_2 or D2ii_1 and D2ii_2

data <- data %>% mutate (acc_coded = case_when(
  !is.na(C1ii_1) & !is.na(C1ii_4) | !is.na(C2ii_1) & !is.na(C2ii_4) ~ "both",
  !is.na(D1ii_1) & !is.na(D1ii_2) | !is.na(D2ii_1) & !is.na(D2ii_2) ~ "some",
  TRUE ~ ""
))

data %>% count(acc_coded) %>% 
  kable("html", col.names=(c("Accountability", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Accountability N
61
both 9
some 10
data %>% group_by(acc_coded) %>% count(arm_coded) %>% 
  kable("html", col.names=(c("Accountability", "Arm", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Accountability Arm N
61
both treatment_100 5
both treatment_200 4
some treatment_100 5
some treatment_200 5
  • Creating payment variable: payself_first_coded, payother_first_coded, payself_final_coded, payother_final_coded
data <- data %>% 
  mutate(payself_first_coded = coalesce(C1i_4, D1i_4, C2i_4, D2i_4)) %>%
  mutate(payother_first_coded = coalesce(C1i_5, D1i_5, C2i_5, D2i_5)) %>%
  mutate(payself_final_coded = coalesce(C1ii_1, D1ii_1, C2ii_1, D2ii_1)) %>%
  mutate(payother_final_coded = coalesce(C1ii_4, D1ii_2, C2ii_4, D2ii_2))
  • Double checking to see if added up to 100 or 200
data %>% filter(arm_coded == "treatment_100") %>% mutate(total_first = payself_first_coded + payother_first_coded) %>% 
  mutate(total_final = payself_final_coded + payother_final_coded) %>% 
  select(total_first, total_final) %>% 
  summary() %>% 
  kable("html", col.names=(c("Total First", "Total Final"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Total First Total Final
Min. :100 Min. :100
1st Qu.:100 1st Qu.:100
Median :100 Median :100
Mean :100 Mean :100
3rd Qu.:100 3rd Qu.:100
Max. :100 Max. :100
data %>% filter(arm_coded == "treatment_200") %>% mutate(total_first = payself_first_coded + payother_first_coded) %>%
  mutate(total_final = payself_final_coded + payother_final_coded) %>% 
  select(total_first, total_final) %>% 
  summary() %>% 
  kable("html", col.names=(c("Total First", "Total Final"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Total First Total Final
Min. :200 Min. :200
1st Qu.:200 1st Qu.:200
Median :200 Median :200
Mean :200 Mean :200
3rd Qu.:200 3rd Qu.:200
Max. :200 Max. :200
  • Creating different variables
data <- data %>% 
  mutate(payself_diff = payself_final_coded - payself_first_coded) %>%
  mutate(payother_diff = payother_final_coded - payother_first_coded)

Create assist_text variable

  • Variable assist_text_first_click_coded and assist_text_last_click_coded are created to combine the time first clicked and last clicked for both groups
data <- data %>% 
  mutate(assist_text_first_click_coded = coalesce(`assist_text100_timer_First Click`, `assist_text200_timer_First Click`)) %>%
  mutate(assist_text_last_click_coded = coalesce(`assist_text100_timer_Last Click`, `assist_text100_timer_Last Click`))

data %>% 
  select(assist_text_first_click_coded, assist_text_last_click_coded) %>% 
  summary() %>% 
  kable("html", col.names=(c("Assist Text First Click in sec", "Assist Text Last Click in sec"))) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Assist Text First Click in sec Assist Text Last Click in sec
Min. : 1.486 Min. : 8.704
1st Qu.: 8.472 1st Qu.:26.649
Median :22.005 Median :57.654
Mean :19.544 Mean :53.364
3rd Qu.:26.626 3rd Qu.:75.235
Max. :42.822 Max. :95.026
NA’s :61 NA’s :70
  • Combine reasonable budget responses
data <- data %>% 
  mutate(reasonable_budget_coded = coalesce(reasonable_budget100_1, reasonable_budget200_1))

data %>% count(reasonable_budget_coded) %>% 
  kable("html", col.names=(c("Reasonable Budget", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Reasonable Budget N
10 1
20 3
30 1
50 6
90 1
100 2
210 1
220 1
250 1
300 2
NA 61

Merging payment status to main data

In data_payment dataset, variable payment_status == SUCCESSFUL if the payment was successful. Merging this variable to the main dataset using ResponseID as the key.

data_payment <- data_payment %>% 
  select(ResponseId, payment_status)

data <- left_join(data, data_payment, by = "ResponseId")


data %>% count(payment_status) %>% 
  kable("html", col.names=(c("Payment Status", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Payment Status N
SUCCESSFUL 20
NA 60
data %>% group_by(arm_coded) %>% count(payment_status) %>% 
  kable("html", col.names=(c("Arm", "Payment Status", "N"))) %>% 
  kable_styling(bootstrap_options = c("striped", "hover"))
Arm Payment Status N
SUCCESSFUL 1
NA 60
treatment_100 SUCCESSFUL 10
treatment_200 SUCCESSFUL 9

Export clean data

write_csv(data, "./pilot_data/clean_data.csv")