load the packages

library(tidyverse)
library(here)
library(janitor)

read the data

AIBQ <- read_csv("Desktop/AIBQ.csv") %>%
  clean_names()

list the variable names

names(AIBQ)

##  [1] "event_index"                     "utc_timestamp"                  
##  [3] "utc_date"                        "local_timestamp"                
##  [5] "local_timezone"                  "local_date"                     
##  [7] "experiment_id"                   "experiment_version"             
##  [9] "tree_node_key"                   "repeat_key"                     
## [11] "schedule_id"                     "participant_public_id"          
## [13] "participant_private_id"          "participant_starting_group"     
## [15] "participant_status"              "participant_completion_code"    
## [17] "participant_external_session_id" "participant_device_type"        
## [19] "participant_device"              "participant_os"                 
## [21] "participant_browser"             "participant_monitor_size"       
## [23] "participant_viewport_size"       "checkpoint"                     
## [25] "task_name"                       "task_version"                   
## [27] "branch_7nct"                     "branch_a6vc"                    
## [29] "branch_3m6c"                     "order_5b7d"                     
## [31] "spreadsheet_name"                "spreadsheet_row"                
## [33] "trial_number"                    "screen_number"                  
## [35] "screen_name"                     "zone_name"                      
## [37] "zone_type"                       "reaction_time"                  
## [39] "reaction_onset"                  "response"                       
## [41] "attempt"                         "correct"                        
## [43] "incorrect"                       "dishonest"                      
## [45] "x_coordinate"                    "y_coordinate"                   
## [47] "timed_out"                       "randomise_blocks"               
## [49] "randomise_trials"                "display"                        
## [51] "answer"                          "scenario"                       
## [53] "question"                        "part1"                          
## [55] "part2"                           "part3"                          
## [57] "choice1"                         "choice3"                        
## [59] "choice2"                         "scenario_alone"

Make the data problem smaller

This chunk selects just variables related to participant, response and display (ARE THESE THE MOST IMPT?), then filters for just 1 participant and drops rows that contain NA, and adds a row_id to help with the pivot_wider.

subset <- AIBQ %>%
  select(participant_private_id, response, display) %>%
  filter(participant_private_id == "1468527") %>%
  drop_na() %>% # drops rows containing NA 
  mutate(row_id = row_number()) # adds unique identifiers

Add a question type column

This chunk adds a new column with question type as a repeating seq.

qtype <- c("rating_1", "rating_2", "rating_3", "preference") 

new_qtype <- subset %>%
  mutate(rating_pref = rep(qtype, 10)) 

head(new_qtype)

## # A tibble: 6 x 5
##   participant_privat… response                   display row_id rating_pref
##                 <dbl> <chr>                      <chr>    <int> <chr>      
## 1             1468527 3                          set1         1 rating_1   
## 2             1468527 3                          set1         2 rating_2   
## 3             1468527 5                          set1         3 rating_3   
## 4             1468527 I just need more time and… set1         4 preference 
## 5             1468527 5                          set2         5 rating_1   
## 6             1468527 3                          set2         6 rating_2

filter on question type

Now that you have column with question type you can use filter to create separate df with preference data and rating data.

ratings <- new_qtype %>%
  filter(rating_pref != "preference") # keeps rows where rating_pref is not preference

preference <- new_qtype %>%
  filter(rating_pref == "preference") # keeps rows where rating_pref is equal to preference

head(ratings)

## # A tibble: 6 x 5
##   participant_private_id response display row_id rating_pref
##                    <dbl> <chr>    <chr>    <int> <chr>      
## 1                1468527 3        set1         1 rating_1   
## 2                1468527 3        set1         2 rating_2   
## 3                1468527 5        set1         3 rating_3   
## 4                1468527 5        set2         5 rating_1   
## 5                1468527 3        set2         6 rating_2   
## 6                1468527 1        set2         7 rating_3

head(preference)

## # A tibble: 6 x 5
##   participant_priva… response                    display row_id rating_pref
##                <dbl> <chr>                       <chr>    <int> <chr>      
## 1            1468527 I just need more time and … set1         4 preference 
## 2            1468527 They don't know if i can c… set2         8 preference 
## 3            1468527 The tests were really diff… set3        12 preference 
## 4            1468527 They didn't think my prese… set4        16 preference 
## 5            1468527 Lots of people want to go … set5        20 preference 
## 6            1468527 I've eaten too many sweets… set6        24 preference

You don’t have any info about what options rating 1, 2, 3 refer to or how that corresponds to their preference choice though.

cassandra

Jen Richmond

02/07/2020