SPINS RISH harmonization matching

Purpose. This script tracks the steps / presents the code used to identify matched participants for the purposes of RISH harmonization.
Written: 2019-12-20
Last ran: 2019-12-23
Website: http://rpubs.com/navona/RISH_matching

#load libraries
library(dplyr)
library(kableExtra)
library(knitr)
library(reshape2)
library(tableone)
library(MatchIt)
xfun::pkg_load2(c('base64enc', 'htmltools', 'mime'))


#read csv 
df <- read.csv('../data/SPINS-MRI_2019-12-22.txt', header=FALSE, stringsAsFactors=FALSE) #ls of file system
df_demo <- read.csv('../data/SPINS_DATA_2019-11-23.csv') #demographic (REDCap) data
df_phantomIDs <- read.csv('../data/human-phantom-ids.csv', stringsAsFactors = FALSE)

#first, cut out the name of the containing directory
df <- df %>% filter(!grepl('/archive/data/SPINS/data/nii/', V1))

#make a new variable with separate subject ID
df$record_id <- substr(df$V1, 1, 14)

#make a new variable with separate site
df$site <- substr(df$V1, 7, 9)

#cut out all rows with 'PHA' in ID (these are non-human phantoms)
df <- df %>% filter(!grepl('PHA', record_id))

#take a look at site
table(df$site)
df <- df %>% filter(!grepl('log', record_id)) #remove error log - follow up

#count unique IDs
length(unique(df$record_id)) #483, INCLUDING travelling human phantoms -- should have similar when keep only DWI scans

#keep only DWI scans (includes .nii, bvec, bval, and json)
df <- df %>% filter(grepl('DTI', V1))

#make sure all participants have all 4 data types (.nii, bvec, bval, json)
counts <- as.data.frame(table(df$record_id))
min(counts$Freq) #great -- we have none less than 4, meaning no data is missing

#now, for ease of counting, keep only .nii data in df
df <- df %>% filter(grepl('.nii.gz', V1)) #now, 480 (so 3 missing DWI), but includes human phantoms 

#count how many scans from human phantoms - have P as 4th last character
sum(substr(df$record_id, 11, 11) %in% "P") #32

#make a column that indicates if the participant is a human phantom
df$isPhantom <- grepl("P", substr(df$record_id, 11, 11))

#remove test subjects -- not real phantoms
df <- df %>% filter(!grepl('998', record_id))
df <- df %>% filter(!grepl('999', record_id)) #so, we really have 30 'true' phantoms

#for now, split the human phantoms and study participants up, into separate databases
df_phantoms <- df[df$isPhantom == 'TRUE', ]
df <- df[df$isPhantom == 'FALSE', ]

Step 1. Munging DWI data (study participants and human phantoms).
First, we summarize the data available in the archive/SPINS/data/nii directory. We have DWI data from 476 participants, including 446 unique study participants, and 30 total human phantom scans (across 5 unique human phantoms). The summary of complete DWI participant data (not including human phantoms) per site is as follows:

table_step1 <- t(data.frame(unclass(table(df$site))))
table_step1 %>% kable(row.names=FALSE) %>% kable_styling()

CMH	CMP	MRC	MRP	ZHH	ZHP
137	33	67	71	45	93

#just keep demo vars we care about 
df_demo <- df_demo[, c('record_id', 'redcap_event_name', 'hand_laterality_quotient', 'demo_sex_birth', 'demo_age_study_entry', 'wtar_std_score', 'wtarsum_std', 'term_early_withdraw')]

#make a single WTAR variable (note: two WTAR variables from different sites)
df_demo$wtar_std <- ifelse(is.na(df_demo$wtar_std_score), df_demo$wtarsum_std, df_demo$wtar_std_score)

#remove unneeded WTAR variables
df_demo <- subset(df_demo, select = -c(wtar_std_score, wtarsum_std))

#recode variables for clarity
df_demo$redcap_event_name <- ifelse(df_demo$redcap_event_name == 'case_arm_2', 'SSD', 'HC')
df_demo$demo_sex_birth <- ifelse(df_demo$demo_sex_birth == 1, 'female', 'male')
df_demo$term_early_withdraw <- ifelse(df_demo$term_early_withdraw == 2, 'EXCLUDE', df_demo$term_early_withdraw)
df_demo$term_early_withdraw <- ifelse(df_demo$term_early_withdraw == 8, 'EXCLUDE', df_demo$term_early_withdraw)

#first, remove participants who are not eligible 
df_demo <- df_demo %>% filter(!grepl('EXCLUDE', term_early_withdraw)) #55 participants

#remove row if there's an NA value in any variable required for harmonization
df_demo <- df_demo[!is.na(df_demo$demo_sex_birth),] #8
df_demo <- df_demo[!is.na(df_demo$demo_age_study_entry),] #0
df_demo <- df_demo[!is.na(df_demo$wtar_std),] #4
df_demo <- df_demo[!is.na(df_demo$hand_laterality_quotient),] #1

#remove data from people who are not right handed (where right-handed is a laterality quotient of >=.5)
#df_demo <- df_demo[(df_demo$hand_laterality_quotient >= .5),]

#remove data from patients (as matching on basis of HCs only)
df_demo <- df_demo[(df_demo$redcap_event_name != 'SSD'),] #304

#rename variables for clarity
names(df_demo)[names(df_demo) == 'redcap_event_name'] <- 'dx' 
names(df_demo)[names(df_demo) == 'demo_sex_birth'] <- 'sex' 
names(df_demo)[names(df_demo) == 'demo_age_study_entry'] <- 'age' 
names(df_demo)[names(df_demo) == 'hand_laterality_quotient'] <- 'handedness'

#merge with imaging data -- will give final count, with all demo and imaging
df <- merge(df, df_demo, by='record_id')

#for clarity, remove unneeded
df <- subset(df, select = -c(term_early_withdraw, V1, isPhantom))

Step 2. Merging DWI data with RISH matching criteria.
Next, for all participants with DWI, we merge demographic data. We exclude participants who (i) did not continuously meet eligibility criteria throughout the SPINS study, (ii) do not have complete data required for harmonization (age, sex, handedness, WTAR), and/or (iii) are not HCs. In total, we have 175 participants who meet these criteria, as follows:

	reference	targets
	CMH	CMP	MRC	MRP	ZHH	ZHP
female	20	10	9	15	9	19
male	26	4	16	17	9	21

Step 3. Selection of human phantom scans.
Recall that the Karayumak paper recommends a minimum of n=16 matched controls (n=20 is the “gold standard”). Here, we see that we still have relatively small numbers of participants – for the purposes of matching – at some sites (namely CMP, MRC, and ZHH), exemplified by an unmatched site total n, stratified by sex, that is not much larger than the desired matched site total n, stratefied by sex (i.e., n=8 for “minimum”" matching sample; n=10 for “gold standard” matching sample). Matching under such conditions may still be possible, but not ideal, i.e., it could result in comparatively large distances between matched subjects.

We have decided to mitigate the danger of a poorly matched sample by matching on the “minimum” recommendation of n=16 . Additionally, we will supplement our study sample with available human phantom data, i.e., data from the 3 PIs who were scanned across sites. (Note that the 3 PI scans come from some combination of Aristotle, Bob, Anil, and Miklos, and we cannot use the data from Jessica as she was only scanned at one site (MRC), and did not complete the DWI scan.) Thus, all human phantom data derives from males.

Below, we match human phantom scans on the basis of availability and scan date. Note that there are multiple human phantom scans for the reference site (CMH); when this is also true for a given target site, we include the scans closest in time to the reference site, and/or in the middle of the study. Thus, the human phantom data that we will include is as follows:

#modify the phantom names to include timepoint information
df_phantoms$timepoint <- substr(df_phantoms$V1, 1, 17)

#merge with the phantoms df taken from the archive -- this will get rid of IDs that don't have diffusion data
df_phantoms <- merge(df_phantomIDs, df_phantoms,  by.x='record_id', by.y='timepoint')

#remove variables we don't want
df_phantoms <- subset(df_phantoms, select = -c(V1, record_id.y, isPhantom))

#CMH and CMP (Anil not scanned at CMP -- so Miklos)
phantoms_cmp <- df_phantoms[df_phantoms$site == 'CMP',]
phantoms_cmp <- rbind(phantoms_cmp, df_phantoms[c(3, 6, 9),])

#CMH and MRC
phantoms_mrc <- df_phantoms[c(14, 16, 18, 2, 5, 8),]

#CMH and MRP (Bob didn't get scanned at MRP -- so Miklos)
phantoms_mrp <- df_phantoms[df_phantoms$site == 'MRP',]
phantoms_mrp <- rbind(phantoms_mrp, df_phantoms[c(3, 9, 6),])

#CMH and ZHH
phantoms_zhh <- df_phantoms[c(22, 24, 26, 1, 4, 7),]

#CMH and ZHP (Anil didn't get scanned)
phantoms_zhp <- df_phantoms[df_phantoms$site == 'ZHP',] 
phantoms_zhp <- rbind(phantoms_zhp, df_phantoms[c(3, 6, 9),])

#write a function to reshape the phantom dfs
phantomsReshape_fn <- function(df){
  reshape(df, idvar = 'PI', direction='wide', timevar = 'site')
}

#reshape all dfs
phantoms_cmp <- phantomsReshape_fn(phantoms_cmp)
phantoms_mrc <- phantomsReshape_fn(phantoms_mrc)
phantoms_mrp <- phantomsReshape_fn(phantoms_mrp)
phantoms_zhh <- phantomsReshape_fn(phantoms_zhh)
phantoms_zhp <- phantomsReshape_fn(phantoms_zhp)

#write a function to create table
phantomsTable_fn <- function(df){
df[, c(1, 4, 2)] %>%
  kable(row.names=FALSE,
        align='c',
        col.names = c("PI", 'reference scan', 'target scan')) %>%
  kable_styling()
}

CMP

phantomsTable_fn(phantoms_cmp)

PI	reference scan	target scan
Aristotle	SPN01_CMH_P001_03	SPN01_CMP_P001_04
Miklos	SPN01_CMH_P002_03	SPN01_CMP_P002_04
Bob	SPN01_CMH_P003_03	SPN01_CMP_P003_04

MRC

phantomsTable_fn(phantoms_mrc)

PI	reference scan	target scan
Aristotle	SPN01_CMH_P001_02	SPN01_MRC_P001_02
Anil	SPN01_CMH_P002_02	SPN01_MRC_P002_02
Bob	SPN01_CMH_P003_02	SPN01_MRC_P003_02

MRP

phantomsTable_fn(phantoms_mrp)

PI	reference scan	target scan
Aristotle	SPN01_CMH_P001_03	SPN01_MRP_P001_03
Bob	SPN01_CMH_P003_03	SPN01_MRP_P003_03
Miklos	SPN01_CMH_P002_03	SPN01_MRP_P005_03

ZHH

phantomsTable_fn(phantoms_zhh)

PI	reference scan	target scan
Aristotle	SPN01_CMH_P001_01	SPN01_ZHH_P001_01
Anil	SPN01_CMH_P002_01	SPN01_ZHH_P002_01
Bob	SPN01_CMH_P003_01	SPN01_ZHH_P003_01

ZHP

phantomsTable_fn(phantoms_zhp)

PI	reference scan	target scan
Aristotle	SPN01_CMH_P001_03	SPN01_ZHP_P001_03
Miklos	SPN01_CMH_P002_03	SPN01_ZHP_P002_03
Bob	SPN01_CMH_P003_03	SPN01_ZHP_P003_03

Step 4. Review study participants’ data before matching.
Here, we review study participants’ data before matching. We see that differences exist in some variables between sites (note p values); this is just a descriptive look at the data for our own understanding of it.

#make a new, boolean group variable (requires by MatchIt package)
df$group <- ifelse(df$site == 'CMH', 0, 1) #make sure not a factor

#make subsets of df , as is required by MatchIt
df_CMH <- df[df$site == 'CMH', ]
df_CMP <- df[df$site == 'CMH' | df$site == 'CMP', ]
df_MRC <- df[df$site == 'CMH' | df$site == 'MRC', ]
df_MRP <- df[df$site == 'CMH' | df$site == 'MRP', ]
df_ZHH <- df[df$site == 'CMH' | df$site == 'ZHH', ]
df_ZHP <- df[df$site == 'CMH' | df$site == 'ZHP', ]

#review of age, sex, and IQ in all sites
dfTable_fn <- function(df){
CreateTableOne(vars = c('age', 'sex', 'handedness', 'wtar_std'),
      data = df,
      factorVars = 'sex',
      strata = 'site')
}

#make a table for each site
prematchCMP <- dfTable_fn(df_CMP)
prematchMRC <- dfTable_fn(df_MRC)
prematchMRP <- dfTable_fn(df_MRP)
prematchZHH <- dfTable_fn(df_ZHH)
prematchZHP <- dfTable_fn(df_ZHP)

#function to print tableone output in df
tableOne_fn <- function(df){
  print(df, printToggle=FALSE, noSpaces=TRUE)
}

#use function
prematchCMP <- tableOne_fn(prematchCMP)
prematchMRC <- tableOne_fn(prematchMRC)
prematchMRP <- tableOne_fn(prematchMRP)
prematchZHH <- tableOne_fn(prematchZHH)
prematchZHP <- tableOne_fn(prematchZHP)


#write a function to make table pretty
prematchTable_fn <- function(df){
df %>%
  kable(align='c') %>%
  kable_styling()
}

CMP

prematchTable_fn(prematchCMP)

	CMH	CMP	p
n	46	14
age (mean (SD))	27.17 (8.34)	26.64 (4.63)	0.821
sex = male (%)	26 (56.5)	4 (28.6)	0.127
handedness (mean (SD))	0.70 (0.41)	0.59 (0.59)	0.432
wtar_std (mean (SD))	116.00 (9.07)	110.64 (8.72)	0.056

MRC

prematchTable_fn(prematchMRC)

	CMH	MRC	p
n	46	25
age (mean (SD))	27.17 (8.34)	37.04 (11.14)	<0.001
sex = male (%)	26 (56.5)	16 (64.0)	0.719
handedness (mean (SD))	0.70 (0.41)	0.65 (0.32)	0.605
wtar_std (mean (SD))	116.00 (9.07)	116.28 (9.65)	0.904

MRP

prematchTable_fn(prematchMRP)

	CMH	MRP	p
n	46	32
age (mean (SD))	27.17 (8.34)	33.50 (11.28)	0.006
sex = male (%)	26 (56.5)	17 (53.1)	0.948
handedness (mean (SD))	0.70 (0.41)	0.49 (0.48)	0.039
wtar_std (mean (SD))	116.00 (9.07)	111.84 (10.91)	0.071

ZHH

prematchTable_fn(prematchZHH)

	CMH	ZHH	p
n	46	18
age (mean (SD))	27.17 (8.34)	32.61 (9.41)	0.027
sex = male (%)	26 (56.5)	9 (50.0)	0.848
handedness (mean (SD))	0.70 (0.41)	0.54 (0.70)	0.242
wtar_std (mean (SD))	116.00 (9.07)	107.06 (14.91)	0.005

ZHP

prematchTable_fn(prematchZHP)

	CMH	ZHP	p
n	46	40
age (mean (SD))	27.17 (8.34)	34.90 (10.56)	<0.001
sex = male (%)	26 (56.5)	21 (52.5)	0.876
handedness (mean (SD))	0.70 (0.41)	0.75 (0.43)	0.626
wtar_std (mean (SD))	116.00 (9.07)	114.25 (11.76)	0.439

#write a function to subset each df by sex
dfSex_fn <- function(df, sex){
df[df$sex == sex, ]
}

#run the function
df_CMP.m <- dfSex_fn(df_CMP, 'male')
df_MRC.m <- dfSex_fn(df_MRC, 'male')
df_MRP.m <- dfSex_fn(df_MRP, 'male')
df_ZHH.m <- dfSex_fn(df_ZHH, 'male')
df_ZHP.m <- dfSex_fn(df_ZHP, 'male')

df_CMP.f <- dfSex_fn(df_CMP, 'female')
df_MRC.f <- dfSex_fn(df_MRC, 'female')
df_MRP.f <- dfSex_fn(df_MRP, 'female')
df_ZHH.f <- dfSex_fn(df_ZHH, 'female')
df_ZHP.f <- dfSex_fn(df_ZHP, 'female')

Step 5. Match study participants from each site.
Now, we match study participants from each site. Following the Karayumak paper, we strive for equal sex balance between the matched groups (as we want to avoid sex-bias and privilege easy-to-interpret and consistent sex-constitution) at the cost of not meeting the “gold standard” n=20 sample size. Effectively, this means that we are striving to include 8 male (including 3 male human phantoms) and 8 females in each group.

One exception to the equal sex balance across all sites much be noted: the CMP match is actually comprised of 7 males (including 3 human phantoms) and 4 females. We figured that slight sex imbalance here is preferable to falling short of the minimum matching sample size. Thus, our matches are as follows:

#write a function to display matched participants
matchTable_fn <- function(df){
df %>% 
  kable(row.names=FALSE,
  col.names = c('reference scan', 'target scan')) %>%
  kable_styling()  
}

CMP males

matchTable_fn(CMP_matched_males)

reference scan	target scan
SPN01_CMH_0144	SPN01_CMP_0188
SPN01_CMH_0038	SPN01_CMP_0187
SPN01_CMH_0004	SPN01_CMP_0199
SPN01_CMH_0123	SPN01_CMP_0216

CMP females

reference scan	target scan
SPN01_CMH_0194	SPN01_CMP_0219
SPN01_CMH_0016	SPN01_CMP_0193
SPN01_CMH_0159	SPN01_CMP_0183
SPN01_CMH_0028	SPN01_CMP_0202
SPN01_CMH_0093	SPN01_CMP_0218
SPN01_CMH_0069	SPN01_CMP_0206
SPN01_CMH_0026	SPN01_CMP_0190
SPN01_CMH_0086	SPN01_CMP_0209
SPN01_CMH_0014	SPN01_CMP_0217

MRC males

reference scan	target scan
SPN01_CMH_0008	SPN01_MRC_0058
SPN01_CMH_0135	SPN01_MRC_0036
SPN01_CMH_0091	SPN01_MRC_0043
SPN01_CMH_0015	SPN01_MRC_0071
SPN01_CMH_0123	SPN01_MRC_0016

MRC females

reference scan	target scan
SPN01_CMH_0017	SPN01_MRC_0070
SPN01_CMH_0069	SPN01_MRC_0020
SPN01_CMH_0026	SPN01_MRC_0068
SPN01_CMH_0134	SPN01_MRC_0057
SPN01_CMH_0054	SPN01_MRC_0065
SPN01_CMH_0020	SPN01_MRC_0066
SPN01_CMH_0159	SPN01_MRC_0024
SPN01_CMH_0125	SPN01_MRC_0021

MRP males

reference scan	target scan
SPN01_CMH_0091	SPN01_MRP_0137
SPN01_CMH_0144	SPN01_MRP_0100
SPN01_CMH_0044	SPN01_MRP_0088
SPN01_CMH_0149	SPN01_MRP_0095
SPN01_CMH_0113	SPN01_MRP_0144

MRP females

reference scan	target scan
SPN01_CMH_0016	SPN01_MRP_0122
SPN01_CMH_0086	SPN01_MRP_0123
SPN01_CMH_0069	SPN01_MRP_0076
SPN01_CMH_0007	SPN01_MRP_0082
SPN01_CMH_0023	SPN01_MRP_0139
SPN01_CMH_0026	SPN01_MRP_0127
SPN01_CMH_0005	SPN01_MRP_0081
SPN01_CMH_0125	SPN01_MRP_0156

ZHH males

reference scan	target scan
SPN01_CMH_0119	SPN01_ZHH_0056
SPN01_CMH_0040	SPN01_ZHH_0048
SPN01_CMH_0044	SPN01_ZHH_0029
SPN01_CMH_0031	SPN01_ZHH_0008
SPN01_CMH_0113	SPN01_ZHH_0005

ZHH females

reference scan	target scan
SPN01_CMH_0020	SPN01_ZHH_0011
SPN01_CMH_0028	SPN01_ZHH_0001
SPN01_CMH_0159	SPN01_ZHH_0002
SPN01_CMH_0134	SPN01_ZHH_0023
SPN01_CMH_0086	SPN01_ZHH_0010
SPN01_CMH_0026	SPN01_ZHH_0003
SPN01_CMH_0194	SPN01_ZHH_0060
SPN01_CMH_0054	SPN01_ZHH_0009

ZHP males

reference scan	target scan
SPN01_CMH_0120	SPN01_ZHP_0170
SPN01_CMH_0123	SPN01_ZHP_0120
SPN01_CMH_0091	SPN01_ZHP_0138
SPN01_CMH_0135	SPN01_ZHP_0137
SPN01_CMH_0065	SPN01_ZHP_0167

ZHP females

reference scan	target scan
SPN01_CMH_0017	SPN01_ZHP_0149
SPN01_CMH_0026	SPN01_ZHP_0116
SPN01_CMH_0134	SPN01_ZHP_0142
SPN01_CMH_0069	SPN01_ZHP_0074
SPN01_CMH_0054	SPN01_ZHP_0068
SPN01_CMH_0086	SPN01_ZHP_0141
SPN01_CMH_0109	SPN01_ZHP_0108
SPN01_CMH_0066	SPN01_ZHP_0145

Step 6. Review matched study participants from each site.
Here, we are reviewing the characteristics of the matched SPINS sample. Note that data from the 3 male human phantoms is not represented here: this summary is simply of the study participants included in the matched sample. We see that differences that existed in the unmatched sample summarized in Step 4 (namely differences in age, sex, and WTAR score) have disappeared from the matched sample. We also see relatively stable ‘distance’ calculations between site pairings. We also know a priori that, if data from the 3 male phantoms were represeted here, match quality would only improve.

#bind together dfs
df_CMP <- rbind(df_CMP.m, df_CMP.f)
df_MRC <- rbind(df_MRC.m, df_MRC.f)
df_MRP <- rbind(df_MRP.m, df_MRP.f)
df_ZHH <- rbind(df_ZHH.m, df_ZHH.f)
df_ZHP <- rbind(df_ZHP.m, df_ZHP.f)


#function for tables
dfTableMatch_fn <- function(df){
CreateTableOne(vars = c('age', 'sex', 'handedness', 'wtar_std', 'distance'),
  data = df,
  factorVars = 'sex',
  strata = 'site')
}

#run function
df_CMP <- dfTableMatch_fn(df_CMP)
df_MRC <- dfTableMatch_fn(df_MRC)
df_MRP <- dfTableMatch_fn(df_MRP)
df_ZHH <- dfTableMatch_fn(df_ZHH)
df_ZHP <- dfTableMatch_fn(df_ZHP)

#make sure can send to kable
df_CMP <- print(df_CMP, printToggle=FALSE, noSpaces=TRUE)
df_MRC <- print(df_MRC, printToggle=FALSE, noSpaces=TRUE)
df_MRP <- print(df_MRP, printToggle=FALSE, noSpaces=TRUE)
df_ZHH <- print(df_ZHH, printToggle=FALSE, noSpaces=TRUE)
df_ZHP <- print(df_ZHP, printToggle=FALSE, noSpaces=TRUE)

CMP

kable(df_CMP, align='c') %>%  kable_styling()

	CMH	CMP	p
n	13	13
age (mean (SD))	27.54 (9.27)	26.77 (4.80)	0.793
sex = female (%)	9 (69.2)	9 (69.2)	1.000
handedness (mean (SD))	0.67 (0.52)	0.69 (0.49)	0.939
wtar_std (mean (SD))	113.23 (10.64)	110.23 (8.94)	0.444
distance (mean (SD))	0.33 (0.18)	0.34 (0.19)	0.850

MRC

kable(df_MRC, align='c') %>%  kable_styling()

	CMH	MRC	p
n	13	13
age (mean (SD))	34.85 (10.89)	39.23 (13.57)	0.373
sex = female (%)	8 (61.5)	8 (61.5)	1.000
handedness (mean (SD))	0.73 (0.39)	0.77 (0.16)	0.712
wtar_std (mean (SD))	114.46 (8.73)	112.77 (11.73)	0.680
distance (mean (SD))	0.40 (0.28)	0.53 (0.29)	0.280

MRP

kable(df_MRP, align='c') %>%  kable_styling()

	CMH	MRP	p
n	13	13
age (mean (SD))	29.08 (8.75)	30.69 (10.38)	0.672
sex = female (%)	8 (61.5)	8 (61.5)	1.000
handedness (mean (SD))	0.63 (0.59)	0.69 (0.34)	0.760
wtar_std (mean (SD))	117.69 (6.93)	113.38 (8.43)	0.168
distance (mean (SD))	0.36 (0.25)	0.42 (0.26)	0.556

ZHH

kable(df_ZHH, align='c') %>%  kable_styling()

	CMH	ZHH	p
n	13	13
age (mean (SD))	29.62 (8.78)	29.77 (8.41)	0.964
sex = female (%)	8 (61.5)	8 (61.5)	1.000
handedness (mean (SD))	0.66 (0.52)	0.47 (0.74)	0.457
wtar_std (mean (SD))	114.69 (9.64)	107.08 (14.14)	0.122
distance (mean (SD))	0.28 (0.15)	0.41 (0.31)	0.179

ZHP

kable(df_ZHP, align='c') %>%  kable_styling()

	CMH	ZHP	p
n	13	13
age (mean (SD))	31.85 (12.54)	36.08 (13.34)	0.413
sex = female (%)	8 (61.5)	8 (61.5)	1.000
handedness (mean (SD))	0.59 (0.58)	0.72 (0.52)	0.532
wtar_std (mean (SD))	115.92 (8.89)	115.38 (8.84)	0.878
distance (mean (SD))	0.50 (0.27)	0.57 (0.27)	0.487

Step 7. Combine all matches – study participants and human phantoms – into a single dataframe for download.
Here, the human phantom pairings are added to the participant matches. As desired, we have n=16 matches for each site. At all sites except CMP, we have an equal sex constitution of n=8 males (including 3 human phantoms) and n=8 females. At CMP, we have n=7 males (including 3 human phantoms) and n=9 females.

males <- rbind(CMP_matched_males,
      MRC_matched_males,
      MRP_matched_males,
      ZHH_matched_males,
      ZHP_matched_males)
      
phantoms_cmp <- phantoms_cmp[,c(4, 2)]
phantoms_mrc <- phantoms_mrc[,c(4, 2)]
phantoms_mrp <- phantoms_mrp[,c(4, 2)]
phantoms_zhh <- phantoms_zhh[,c(4, 2)]
phantoms_zhp <- phantoms_zhp[,c(4, 2)]

#bind phantoms
phantoms <- (mapply(c, phantoms_cmp, phantoms_mrc, phantoms_mrp ,  phantoms_zhh, phantoms_zhp))
phantoms <- as.data.frame(phantoms)
phantoms[] <- lapply(phantoms, as.character)

#females
females <- rbind(CMP_matched_females,
      MRC_matched_females,
      MRP_matched_females,
      ZHH_matched_females,
      ZHP_matched_females)

#bind all together
df_matchComplete <- mapply(c, males, phantoms, females)

#write.csv
write.csv(df_matchComplete, '../data/df_matchComplete.csv', row.names=FALSE)

#link for download
xfun::embed_file('../data/df_matchComplete.csv')

Download df_matchComplete.csv