library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/CCRR_kids
US_PATH <- here("data/1_raw_data/US/")
CN_PATH <- here("data/1_raw_data/CN/")
MERGED_DATA_PATH <- here("data/2_merge_data/merged_data.csv")
first we count how many rows are in the data files
us_files <- str_c(US_PATH, dir(here(US_PATH), "*.csv"))
cn_files <- str_c(CN_PATH, dir(here(CN_PATH), "*.csv"))
us_data_RAW <- map_df(us_files, function(file) {
d <- read_csv(file) %>%
count() %>%
mutate(
file_name = file
)
})
cn_data_RAW <- map_df(cn_files, function(file) {
d <- read_csv(file) %>%
count() %>%
mutate(
file_name = file,
)
})
#TBA: check if we are just trying it out
MIN_ROW = 50
us_data <- map_df((us_data_RAW %>% filter(n > MIN_ROW))$file_name,
function(file){
d <- read_csv(file) %>%
mutate(unique = as.character(unique))
}) %>%
mutate(culture = "US")
cn_data <- map_df((cn_data_RAW %>% filter(n > MIN_ROW))$file_name,
function(file){
d <- read_csv(file) %>%
mutate(unique = as.character(unique))
})%>%
mutate(culture = "CN")
# count number of raw participants:
fun.count_s <- function(df){
num_s <- df %>% distinct(subject) %>% count()
return(num_s)
}
#fun.count_s(us_data)
fun.count_s(cn_data)
## # A tibble: 1 x 1
## n
## <int>
## 1 11
merge data together
merged_data <- bind_rows(us_data,cn_data)
write_csv(merged_data, MERGED_DATA_PATH)
# currently contains voice info which is identifiable
if no raw data start from reading merged data
#merged_data %>%
# filter(culture == "CN")
source(here("preprocessing/extract/extract_demog.R"))
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
## buggy, does not work, issue with the jsonstring!
#extract_demog(merged_data, "US")
#extract_demog(merged_data, "CN")
this is very slow! need to optimize or modularize this into another rmd
ad_only <- merged_data %>%
select(culture, subject, audio_data) %>%
filter(culture == "CN") %>%
filter(!is.na(audio_data))
library(base64enc)
write_to_audio <- function(audio_code, path_name){
audio <- base64decode(audio_code)
filte_path <- file(paste0(path_name, ".wav"), "wb")
writeBin(audio, filte_path)
close(filte_path)
}
convert_all_audio_file <- function(df){
total_n <- nrow(df)
df_id <- df %>%
group_by(subject) %>%
mutate(audio_id = row_number())
for (i in 1:total_n){
path_name <- here(paste0(df_id$audio_id[[i]], df_id$subject[[i]]))
audio_code <- df_id$audio_data[[i]]
write_to_audio(audio_code, path_name)
}
}
convert_all_audio_file(ad_only)
source(here("preprocessing/task/RMTS.R"))
get_RMTS_main(merged_data)
## # A tibble: 48 x 7
## subject culture task_name task_info trial_info resp_type resp
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 SS1623330510838 US RMTS RMTS RMTS choice_match 1
## 2 SS1623330510838 US RMTS RMTS RMTS choice_match 1
## 3 SS1623330510838 US RMTS RMTS RMTS choice_match 1
## 4 SS1623330510838 US RMTS RMTS RMTS choice_match 1
## 5 SS1625633849073 CN RMTS RMTS RMTS choice_match 0
## 6 SS1625633849073 CN RMTS RMTS RMTS choice_match 0
## 7 SS1625633849073 CN RMTS RMTS RMTS choice_match 0
## 8 SS1625633849073 CN RMTS RMTS RMTS choice_match 0
## 9 SS1626246567529 CN RMTS RMTS RMTS choice_match 0
## 10 SS1626246567529 CN RMTS RMTS RMTS choice_match 0
## # … with 38 more rows
source(here("preprocessing/task/conformity_preference.R"))
get_CP_main(merged_data)
## # A tibble: 15 x 7
## subject culture task_name task_info trial_info resp_type resp
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 SS1619115057503 US CP CP CP choice 0
## 2 SS1620060991140 US CP CP CP choice 1
## 3 SS1622239825527 US CP CP CP choice 1
## 4 SS1623330510838 US CP CP CP choice 0
## 5 SS1625633849073 CN CP CP CP choice 1
## 6 SS1626246567529 CN CP CP CP choice 1
## 7 SS1626248030076 CN CP CP CP choice 0
## 8 SS1626270942346 CN CP CP CP choice 1
## 9 SS1626318133574 CN CP CP CP choice 1
## 10 SS1626318241504 CN CP CP CP choice 0
## 11 SS1626205419081 CN CP CP CP choice 0
## 12 SS1626350796915 CN CP CP CP choice 1
## 13 SS1626436894129 CN CP CP CP choice 1
## 14 SS1626472640251 CN CP CP CP choice 1
## 15 SS1626483615684 CN CP CP CP choice 0