This is an R Markdown document for demonstrating how to write a basic r function to deal with multiple datasets without repeating the code.
library(tidyverse) # Install package "tidyverse"
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
G1combined <- read_csv("G1combined_anonymous.csv") # Input data
## Rows: 47551 Columns: 74
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (27): input_kb_2.keys, input_kb_2.rt, Flanker_resp.keys, input_kb.keys, ...
## dbl (46): RandomID, ExpGroup, guess_input_begin, Flanker_resp.corr, Flanker_...
## lgl (1): Intuitive
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(G1combined) # View the first 10 rows of the dataset
## # A tibble: 6 × 74
## RandomID ExpGroup input_kb_2.keys input_kb_2.rt guess_input_begin
## <dbl> <dbl> <chr> <chr> <dbl>
## 1 71028 1 <NA> <NA> NA
## 2 71028 1 <NA> <NA> NA
## 3 71028 1 <NA> <NA> NA
## 4 71028 1 <NA> <NA> NA
## 5 71028 1 <NA> <NA> NA
## 6 71028 1 <NA> <NA> NA
## # … with 69 more variables: Flanker_resp.keys <chr>, Flanker_resp.corr <dbl>,
## # Flanker_resp.rt <dbl>, input_kb.keys <chr>, guess_input_end <dbl>,
## # flanker_pause_resp.keys <chr>, Flanker_Loop.thisRepN <dbl>,
## # Flanker_Loop.thisTrialN <dbl>, Flanker_Loop.thisN <dbl>,
## # Flanker_Loop.thisIndex <dbl>, Flanker_Loop.ran <dbl>, text <chr>,
## # flanker_corr <chr>, congruency <chr>, input_kb.rt <chr>,
## # flanker_pause_resp.rt <dbl>, Num_Instr_resp.keys <chr>, …
The below process takes 12 lines, if you have four sets of data, you need to repeat the below section 3 times (at least 48 lines plus the original piece). You also need to modify the parameter i.e., data name, for three times. Your chance to make Type I error increases at least triple.
G1combined1 <- G1combined %>%
mutate(# Return a column with correct RT
RTcorr = case_when(Flanker_resp.corr == 1 ~ Flanker_resp.rt,
Num_resp.corr == 1 ~ Num_resp.rt,
Sci_resp.corr == 1 ~ Sci_resp.rt),
# Return a column with correct RT
RTincorr = case_when(Flanker_resp.corr == 0 ~ Flanker_resp.rt,
Num_resp.corr == 0 ~ Num_resp.rt,
Sci_resp.corr == 0 ~ Sci_resp.rt),
# Return a column with task type
Task = case_when(!is.na(Flanker_resp.rt) ~ "Flanker",
!is.na(Num_resp.rt) ~ "Numerical",
!is.na(Sci_resp.rt) ~ "Science"))
# structure
# NameOftheFunction <- function(df){
# The code same as the one dealing with single dataset
# }
ReturnRTonAcc <- function(df) {
df %>%
mutate(RTcorr = case_when(Flanker_resp.corr == 1 ~ Flanker_resp.rt,
Num_resp.corr == 1 ~ Num_resp.rt,
Sci_resp.corr == 1 ~ Sci_resp.rt),
RTincorr = case_when(Flanker_resp.corr == 0 ~ Flanker_resp.rt,
Num_resp.corr == 0 ~ Num_resp.rt,
Sci_resp.corr == 0 ~ Sci_resp.rt),
Task = case_when(!is.na(Flanker_resp.rt) ~ "Flanker",
!is.na(Num_resp.rt) ~ "Numerical",
!is.na(Sci_resp.rt) ~ "Science"))
}
G1combined1 %>%
select(74:77) # Check the function's output
## # A tibble: 47,551 × 4
## remind_id_kb.rt RTcorr RTincorr Task
## <dbl> <dbl> <dbl> <chr>
## 1 NA NA NA <NA>
## 2 NA NA NA <NA>
## 3 NA NA NA <NA>
## 4 NA NA NA <NA>
## 5 NA NA NA <NA>
## 6 NA NA NA <NA>
## 7 NA NA NA <NA>
## 8 NA NA NA <NA>
## 9 NA NA NA <NA>
## 10 NA NA NA <NA>
## # … with 47,541 more rows
G2combined <- read_csv("G2combined_anonymous.csv") # Input a new dataset with the same structure
## Rows: 49238 Columns: 60── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): Flanker_resp.keys, flanker_pause_resp.keys, text, flanker_corr, co...
## dbl (44): RandomID, ExpGroup, Flanker_resp.corr, Flanker_resp.rt, Flanker_Lo...
## lgl (1): Intuitive
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2combined1 <- ReturnRTonAcc(G2combined) # Dont forget to name the new data before running the function
G2combined1 %>%
select(60:63)
## # A tibble: 49,238 × 4
## remind_id_kb.rt RTcorr RTincorr Task
## <dbl> <dbl> <dbl> <chr>
## 1 NA NA NA <NA>
## 2 NA NA NA <NA>
## 3 NA NA NA <NA>
## 4 NA NA NA <NA>
## 5 NA NA NA <NA>
## 6 NA NA NA <NA>
## 7 NA NA NA <NA>
## 8 NA NA NA <NA>
## 9 NA NA NA <NA>
## 10 NA 1.45 NA Flanker
## # … with 49,228 more rows
Let’s try another one. This time we want to calculate the average score for individual participant.
G1averageRT <- G1combined1 %>%
group_by(RandomID, Task) %>% # group by ID and task type
summarise(meanRTcorr = mean(RTcorr, na.rm = T), # Get the average
meanRTincorr = mean(RTcorr, na.rm = T)) %>%
filter(!is.na(Task))
## `summarise()` has grouped output by 'RandomID'. You can override using the
## `.groups` argument.
head(G1averageRT)
## # A tibble: 6 × 4
## # Groups: RandomID [2]
## RandomID Task meanRTcorr meanRTincorr
## <dbl> <chr> <dbl> <dbl>
## 1 123 Flanker 0.523 0.523
## 2 123 Numerical 2.49 2.49
## 3 123 Science 4.18 4.18
## 4 10621 Flanker 0.595 0.595
## 5 10621 Numerical 1.89 1.89
## 6 10621 Science 2.52 2.52
Write and run the new function
GetAverageMean <- function(df){
df %>%
group_by(RandomID, Task) %>%
summarise(meanRTcorr = mean(RTcorr, na.rm = T),
meanRTincorr = mean(RTcorr, na.rm = T)) %>%
filter(!is.na(Task))
}
G2averageRT <- GetAverageMean(G2combined1)
## `summarise()` has grouped output by 'RandomID'. You can override using the
## `.groups` argument.
head(G2averageRT) # Check the first 10 rows.
## # A tibble: 6 × 4
## # Groups: RandomID [2]
## RandomID Task meanRTcorr meanRTincorr
## <dbl> <chr> <dbl> <dbl>
## 1 123 Flanker 0.589 0.589
## 2 123 Numerical 2.81 2.81
## 3 123 Science 3.73 3.73
## 4 10236 Flanker 0.694 0.694
## 5 10236 Numerical 5.76 5.76
## 6 10236 Science 4.33 4.33