####WORKSPACE SETTING####
suppressPackageStartupMessages(c(
library(dplyr),
library(magrittr),
library(psych),
library(mice),
library(psychTools),
library(careless),
library(eulerr),
library(ggplot2),
library(patchwork),
library(ComplexUpset)
))
## Warning: il pacchetto 'psychTools' è stato creato con R versione 4.4.3
## Warning: il pacchetto 'careless' è stato creato con R versione 4.4.2
## Warning: il pacchetto 'eulerr' è stato creato con R versione 4.4.3
## Warning: il pacchetto 'ggplot2' è stato creato con R versione 4.4.3
## Warning: il pacchetto 'patchwork' è stato creato con R versione 4.4.3
## Warning: il pacchetto 'ComplexUpset' è stato creato con R versione 4.4.3
## [1] "dplyr" "stats" "graphics" "grDevices" "utils"
## [6] "datasets" "methods" "base" "magrittr" "dplyr"
## [11] "stats" "graphics" "grDevices" "utils" "datasets"
## [16] "methods" "base" "psych" "magrittr" "dplyr"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base" "mice" "psych" "magrittr"
## [31] "dplyr" "stats" "graphics" "grDevices" "utils"
## [36] "datasets" "methods" "base" "psychTools" "mice"
## [41] "psych" "magrittr" "dplyr" "stats" "graphics"
## [46] "grDevices" "utils" "datasets" "methods" "base"
## [51] "careless" "psychTools" "mice" "psych" "magrittr"
## [56] "dplyr" "stats" "graphics" "grDevices" "utils"
## [61] "datasets" "methods" "base" "eulerr" "careless"
## [66] "psychTools" "mice" "psych" "magrittr" "dplyr"
## [71] "stats" "graphics" "grDevices" "utils" "datasets"
## [76] "methods" "base" "ggplot2" "eulerr" "careless"
## [81] "psychTools" "mice" "psych" "magrittr" "dplyr"
## [86] "stats" "graphics" "grDevices" "utils" "datasets"
## [91] "methods" "base" "patchwork" "ggplot2" "eulerr"
## [96] "careless" "psychTools" "mice" "psych" "magrittr"
## [101] "dplyr" "stats" "graphics" "grDevices" "utils"
## [106] "datasets" "methods" "base" "ComplexUpset" "patchwork"
## [111] "ggplot2" "eulerr" "careless" "psychTools" "mice"
## [116] "psych" "magrittr" "dplyr" "stats" "graphics"
## [121] "grDevices" "utils" "datasets" "methods" "base"
Setting wd in the same directory where the R file is stored (dynamic for reproducilibiliy, it only works in R studio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#### DATA INPUT ####
bilingual1 <- read.csv("data/data.csv")
#### DATA CLEANING ####
Keeping only those that finished the questionnaire
bilingual1 <- bilingual1[bilingual1$FINISHED==1,]
Removing wrong IDs using regex
bilingual1 <- bilingual1[grepl("^[A-Fa-f0-9]{24}$",bilingual1$PR01_01),]
Creating a dataset with only asaq answers
asaq <- bilingual1 %>%
select(contains("P10"),contains("P20"))
Calculating number of NAs (108 NAs per participant are expected due to random assignment)
asaq %>%
apply(1,\(x) (sum(is.na(x))/(ncol(asaq)/2)) - 1 ) %>%
round(2) %>%
table %>%
barplot(main="proportion of missing answers per participant")
COMMENT: the pattern is NOT mar. Most people clearly gave up answering when skipping from a section to the other. These participants will be excluded from the data set as decided in the pre-registration and no further analysis will be performed on this mnar pattern as it should not bias the data.
We will allow up to 10% of missing answers.
exclusion_flag_missing <- asaq %>%
apply(1,\(x) (sum(is.na(x))/(ncol(asaq)/2)) - 1 ) %>%
{.>=.1}
Bogus items are marked with numbers from _46 to _54 (_46 to _51 are reverse)
Representing bogus answers
bogus_scores <- asaq %>%
#reversing bogus items
mutate(
across(matches("_(46|47|48|49|50|51)$"),
\(x) 8 - x)
) %>%
#selecting bogus items
select(matches("_(46|47|48|49|50|51|52|53|54)$")) %>%
#scoring
apply(1,sum,na.rm=T) %T>%
#representation
{barplot(table(.),main="Bogus items scoring")}
Decision on bogus items cut-off: answers go from 1 to 7 and there are 18 bogus items, we could admit both 6 and 7 to be acceptable answers, so the lowest acceptable score should be 6*18=108
saving these exclusion criteria
exclusion_flag_bogus <- bogus_scores<=108
removing bogus items from asaq
asaq <- asaq %>%
select(-matches("_(46|47|48|49|50|51|52|53|54)$"))
watch time of the video and questionnaire time will be evaluated
The video was 20s long, so all times smaller than 20s will be discarded
exclusion_flag_time1 <- bilingual1$TIME004 <= 20
There were 108 items on this page. We will admit those that spent on average at least 2s per item, so 216 seconds on the page as a whole.
exclusion_flag_time2 <- bilingual1$TIME005 <= 216
list(missing = exclusion_flag_missing %>% unname %>% which,
bogus = exclusion_flag_bogus %>% unname %>% which,
time1 = exclusion_flag_time1 %>% which,
time2 = exclusion_flag_time2 %>% which,
participants = rep(TRUE,length(bilingual1)) %>% which) %>%
euler %>%
plot
tibble(missing = exclusion_flag_missing %>% unname,
bogus = exclusion_flag_bogus %>% unname,
time1 = exclusion_flag_time1,
time2 = exclusion_flag_time2) %>%
upset(intersect=c("missing","bogus","time1","time2"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ComplexUpset package.
## Please report the issue at
## <https://github.com/krassowski/complex-upset/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
COMMENT: bogus items detected all careless participants detected with other methods! Removing participants
exclusion_criteria <- exclusion_flag_missing |
exclusion_flag_bogus |
exclusion_flag_time1 |
exclusion_flag_time2
asaq <- asaq[!exclusion_criteria,]
Below are careful participants
valid_paricipants <- bilingual1$PR01_01[!exclusion_criteria] %T>% print
## [1] "65a3f461bfa22dde98c10436" "5a5dc26facc75b00017a8374"
## [3] "5b6f2884c727a90001529ef0" "5c846aba6a4b9b0016c854f5"
## [5] "5f0da6848c9769018bb99f27" "5c36448c2e98540001f7dc4f"
## [7] "5db43b6a2f45e7000bb7ab5c" "5fe328a0a53c57f645dc0b19"
## [9] "60525afd42b105aeeb9124b0" "5c609a727557640001f516c5"
## [11] "5e7dba4f1d6961436474c8f2" "608abc6251feb3ddc3b2e01d"
## [13] "5fde11a92cd5d051ebacc251" "613df323177af3b16b144281"
## [15] "545d0b09fdf99b7f9fca2261" "5e81b4ab1fa05c00097c8c9d"
## [17] "6278f2c1db1a5b8b12b94a21" "5f317f269c42ef04587aa7a0"
## [19] "67665f7b0329bd3e30515ad2" "66f0765bb7f6c2ae6ef721ee"
## [21] "651ac44e976df13fd9976888" "5e81fd2e6f674202efe1ccb9"
## [23] "597f461fbcae9f00014ceacd" "6010381382846a0ae97c00c5"
## [25] "5f0ef9c9ff62e3018320db31" "5f21ff288900cb4709dd49aa"
## [27] "65c4e6a306be5633a2a6561a" "59ca5f505e015800019778d0"
## [29] "67b8f459f36ac348b98d6cad" "5f20678a817321134bd74855"
## [31] "5f53ee2e1b73b474acb3adf8" "5bf2f728bd9f150001f6cc42"
## [33] "56c65a5a305eaa0005f2a634" "60585ff1bd867c3aab48cef1"
## [35] "5d8b11e9849bf40016055f0f" "6102f00212f0ce63ba98bcf3"
## [37] "5dce3d1403c2bf070671c87d" "5a9ddcabdbdb470001ef032d"
## [39] "6026a3507e666237c315a0b0" "5fe230ee29598a57972db630"
## [41] "6097ec1a8c0b4df21cbb9024"
Below are careless participants
careless_participants <- bilingual1$PR01_01[exclusion_criteria] %T>% print
## [1] "66d31a1b0515fe747830bab9" "5afcbf18fabc8900018854d9"
## [3] "609a3eb31cacec1cad4b9448" "6315faf8288fc830449ffd3d"
## [5] "66d035e529e80315f590ecce" "615b1603b2c14aef69aea329"
## [7] "5bad01517e61b90001d36181" "5e4c511d056b9c0ad9b6fd24"
## [9] "568db47ae3ef9e000ca0dcdd" "59961863004c4f0001cc864a"
## [11] "5ea824f580b9ef228f589930" "63d1125c54e8148f4cd30a65"
## [13] "668ebf594982c57c983cd910" "5eca46c6c1e2c601290bab15"
## [15] "62c428077a100bb125ebb569" "5f3287b83428cb05746e2214"
## [17] "5a0a057c80acd80001047392"