Importing data UKB data

Import the phenotypes that were extracted from the large UKB file (awk) ($PHENO/Full_Phenotype_Data_250917_With_MHQ_TABBED.txt)

UKBpheno <- fread(file = "data_raw/2019_02_29_BC/EDcases_pheno.txt",
                  header = TRUE,
                  data.table = FALSE)
dim(UKBpheno)
[1] 502618     17
# Recode as factor
UKBpheno$Gender <- factor(UKBpheno$Gender, levels = c(0, 1), labels = c("Female", "Male"))
UKBfactor <-c("Centre")
UKBpheno[UKBfactor] <- lapply(UKBpheno[UKBfactor], factor)

# Numeric columns
# Pregnancy and Menopause are numeric for recoding
UKBnumeric <-c("IID", "Age", "Height", "Weight",
               "BMI", "WC", "HC",
               "BFPC", "FM", "FFM",
               "SES", "Pregnancy", "Menopause")

# Calculate waist-to-hip ratio (WHR)
UKBpheno$WHR <- UKBpheno$WC/UKBpheno$HC

# Check structure
str(UKBpheno)
'data.frame':   502618 obs. of  18 variables:
 $ IID                   : int  1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
 $ Gender                : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 1 1 2 1 ...
 $ Age                   : int  57 49 48 64 43 56 46 54 40 66 ...
 $ Height                : num  157 177 149 172 174 ...
 $ Weight                : num  75 77.3 54.3 78.3 94.6 93.3 60.6 61.5 105 78.5 ...
 $ BMI                   : num  30.4 24.7 24.5 26.5 31.2 29.8 25.2 23.4 31.7 29.5 ...
 $ WC                    : num  92 91 76 90 104 96 79 71 104 94 ...
 $ HC                    : num  109 101 95 102 108 ...
 $ BFPC                  : num  42.6 40.6 30.3 20.8 31.8 25 35.6 28.5 27.6 42.9 ...
 $ FM                    : num  31.9 31.4 16.5 16.3 30.1 23.3 21.6 17.5 29 33.7 ...
 $ FFM                   : num  43.1 46 37.9 62 64.5 70 39 44 76 44.8 ...
 $ SES                   : num  -4.057 -3.1261 -1.6993 -4.6224 -0.0044 ...
 $ Tobacco_current_orig  : int  0 1 0 0 0 0 0 0 0 0 ...
 $ Alcohol_frequency_orig: int  3 3 5 1 2 3 3 2 3 1 ...
 $ Pregnancy             : int  0 0 0 NA NA NA 0 0 NA 0 ...
 $ Menopause             : int  1 0 0 NA NA NA 0 3 NA 1 ...
 $ Centre                : Factor w/ 22 levels "10003","11001",..: 12 19 21 12 5 20 7 12 1 12 ...
 $ WHR                   : num  0.844 0.901 0.8 0.882 0.963 ...

Recode UKB variables

Gender: Female == 0, Male == 1

Menopause: Split by females and males

# Empty column
UKBpheno["Menopause_new"] <- NA

# Male & NA
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Male" & is.na(Menopause)] <- 0)
# Female & NA
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & is.na(Menopause)] <- 1)
# Female & Yes
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 1] <- 2)
# Female & Hysterectomy
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 2] <- 3)
# Female & Not sure
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 3] <- 4)
# Female & Prefer not to answer
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == -3] <- 5)
# Female & No
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 0] <- 6)
# Recode as factor
UKBpheno$Menopause_new <- as.factor(UKBpheno$Menopause_new)
# Delete Menopause
UKBpheno$Menopause <- NULL

summary(UKBpheno$Menopause_new)
     0      1      2      3      4      5      6   NA's 
229131    475 165411  31171  11732    535  64081     82 
# OLD
#      0      1      2      3      4      5      6   NA's
# 177552    128 127330  23826   8896    183  47825      2

Pregnancy

# Empty column
UKBpheno["Pregnancy_no_NA"] <- NA
# Male & NA
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Male" & is.na(Pregnancy)] <- 0)
# Female & NA
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & is.na(Pregnancy)] <- 1)
# Female & No
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 0] <- 2)
# Female & Yes
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 1] <- 3)
# Female & Not sure
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 2] <- 4)
# Recode as factor
UKBpheno$Pregnancy_no_NA <- as.factor(UKBpheno$Pregnancy_no_NA)
# Delte old Pregnancy column
UKBpheno$Pregnancy <- NULL

summary(UKBpheno$Pregnancy_no_NA)
     0      1      2      3      4   NA's 
229131    842 272191    150    222     82 
# OLD
#     0      1      2      3      4   NA's
# 177552    209 207720    105    154      2

Recode alcohol frequency as ordered factor

UKBpheno$Alcohol_frequency_temp <- UKBpheno$Alcohol_frequency_orig

# Prefer not to answer as -3 -> NA
UKBpheno <- within(UKBpheno, Alcohol_frequency_temp[Alcohol_frequency_temp == "-3"] <- NA)
# Ordered factor
UKBpheno$Alcohol_frequency <- factor(UKBpheno$Alcohol_frequency_temp,
                            levels = c("6", "5", "4", "3", "2", "1"),
                            labels = c("Never", "Special occasions only", "One to three times a month",
                                       "Once or twice a week", "Three or four times a week", "Daily or almost daily"),
                            ordered = TRUE)

UKBpheno$Alcohol_frequency_temp <- NULL

summary(UKBpheno$Alcohol_frequency)
                     Never     Special occasions only 
                     40649                      58013 
One to three times a month       Once or twice a week 
                     55860                     129298 
Three or four times a week      Daily or almost daily 
                    115446                     101775 
                      NA's 
                      1577 
ggplot(data = subset(UKBpheno, !is.na(Alcohol_frequency)),
       aes(x = Alcohol_frequency)) +
  geom_histogram(stat = "count") +
  labs(y = "Frequency",
       title = "Alcohol intake frequency (self-report)",
       color = "black") +
  theme(panel.grid.major.x = element_line(size = 0.5,
                                        linetype = 'dashed',
                                        colour = "gray"),
        axis.title.y = element_blank(),
        axis.text.x = element_text(colour="black", size = 12),
        axis.text.y = element_text(colour="black", size = 12),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_blank(),
        panel.background = element_blank()) +
  scale_y_continuous(labels = scales::comma) +
  coord_flip()
Warning: Ignoring unknown parameters: binwidth, bins, pad

Recode current tobacco smoking as ordered factor

UKBpheno$Tobacco_current_temp <- UKBpheno$Tobacco_current_orig

# Prefer not to answer as -3 -> NA
UKBpheno <- within(UKBpheno, Tobacco_current_temp[Tobacco_current_temp == "-3"] <- NA)
# Ordered factor
UKBpheno$Tobacco_current <- factor(UKBpheno$Tobacco_current_temp,
                            levels = c("0", "1", "2"),
                            labels = c("No", "Only occasionally", "Yes, on most or all days"),
                            ordered = TRUE)

UKBpheno$Tobacco_current_temp <- NULL

summary(UKBpheno$Tobacco_current)
                      No        Only occasionally Yes, on most or all days 
                  448244                    39244                    13735 
                    NA's 
                    1395 
ggplot(data = subset(UKBpheno, !is.na(Tobacco_current)),
       aes(x = Tobacco_current)) +
  geom_histogram(stat = "count") +
  labs(y = "Frequency",
       title = "Current tobacco use (self-report)") +
  theme(panel.grid.major.x = element_line(size = 0.5,
                                        linetype = 'dashed',
                                        colour = "gray"),
        axis.title.y = element_blank(),
        axis.text.x = element_text(colour="black", size = 12),
        axis.text.y = element_text(colour="black", size = 12),
        axis.ticks.x = element_blank(),
        axis.ticks.y = element_blank(),
        panel.background = element_blank()) +
  scale_y_continuous(labels = scales::comma) +
  coord_flip()
Warning: Ignoring unknown parameters: binwidth, bins, pad

Check number of NAs per column

colSums(is.na(UKBpheno))
                   IID                 Gender                    Age 
                     0                     75                     75 
                Height                 Weight                    BMI 
                  2614                  10204                  10212 
                    WC                     HC                   BFPC 
                  2235                   2294                  10484 
                    FM                    FFM                    SES 
                 11050                  10252                    698 
  Tobacco_current_orig Alcohol_frequency_orig                 Centre 
                   966                    972                     75 
                   WHR          Menopause_new        Pregnancy_no_NA 
                  2340                     82                     82 
     Alcohol_frequency        Tobacco_current 
                  1577                   1395 

Define columns for complete cases

cols_cc <-c("Gender", "Age",
                   "Height", "Weight", "BMI", "WC", "HC",
                   "BFPC", "FM", "FFM",
                   "SES",
                   "Tobacco_current_orig", "Alcohol_frequency_orig",
                   "Centre", "WHR",
                   "Menopause_new", "Pregnancy_no_NA")
cols_cc
 [1] "Gender"                 "Age"                   
 [3] "Height"                 "Weight"                
 [5] "BMI"                    "WC"                    
 [7] "HC"                     "BFPC"                  
 [9] "FM"                     "FFM"                   
[11] "SES"                    "Tobacco_current_orig"  
[13] "Alcohol_frequency_orig" "Centre"                
[15] "WHR"                    "Menopause_new"         
[17] "Pregnancy_no_NA"       

Calculate measures adjusted for BMI and Height

# Calculate WHR adjusted for BMI
WHRadjBMImod <- lm(WHR ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$WHRadjBMI <- resid(WHRadjBMImod)
rm(WHRadjBMImod)
summary(UKBpheno$WHRadjBMI)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -0.624  -0.060   0.003   0.000   0.060   1.290   10318 
WCadjBMImod <- lm(WC ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$WCadjBMI <- resid(WCadjBMImod)
rm(WCadjBMImod)
summary(UKBpheno$WCadjBMI)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-56.923  -5.723   0.049   0.000   5.763  73.307   10289 
HCadjBMImod <- lm(HC ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$HCadjBMI <- resid(HCadjBMImod)
rm(HCadjBMImod)
summary(UKBpheno$HCadjBMI)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-57.685  -2.972   0.001   0.000   2.974  77.647   10282 
WHRadjBFPCmod <- lm(WHR ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$WHRadjBFPC <- resid(WHRadjBFPCmod)
rm(WHRadjBFPCmod)
summary(UKBpheno$WHRadjBFPC)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -0.695  -0.065   0.001   0.000   0.062   1.241   10590 
WCadjBFPCmod <- lm(WC ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$WCadjBFPC <- resid(WCadjBFPCmod)
rm(WCadjBFPCmod)
summary(UKBpheno$WCadjBFPC)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-63.829 -10.388  -0.043   0.000   8.985 105.660   10561 
HCadjBFPCmod <- lm(HC ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$HCadjBFPC <- resid(HCadjBFPCmod)
rm(HCadjBFPCmod)
summary(UKBpheno$HCadjBFPC)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-57.614  -5.411  -0.329   0.000   4.732  81.121   10554 
FFMadjHeightmod <- lm(FFM ~ Height, data=UKBpheno, na.action=na.exclude)
UKBpheno$FFMadjHeight <- resid(FFMadjHeightmod)
rm(FFMadjHeightmod)
summary(UKBpheno$FFMadjHeight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-38.394  -4.656  -0.350   0.000   4.255  38.836   10505 
BFPCadjHeightmod <- lm(BFPC ~ Height, data=UKBpheno, na.action=na.exclude)
UKBpheno$BFPCadjHeight <- resid(BFPCadjHeightmod)
rm(BFPCadjHeightmod)
summary(UKBpheno$BFPCadjHeight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
-35.614  -4.998  -0.120   0.000   4.958  36.276   10734 

Import additional phenotype data from UKB that has several NAs

UKBpheno_add <- fread(file = "data_raw/2019_02_29_BC/EDcases_pheno_additional.txt", header = TRUE,
                  data.table = FALSE)


# Recode as factor
UKB_addfactor <-c("Region", "Income",
                  "Contraceptive_ever", "HRT", "Breastfed",
                  "BipolarInitialQ")
UKBpheno_add[UKB_addfactor] <- lapply(UKBpheno_add[UKB_addfactor], factor)

# Numeric columns
UKB_add_numeric <-c("IID", "Impedance_wb",
               "FM_trunk","FFM_trunk", "BFPC_trunk",
               "Birth_weight", "VAT",
               "BMC_wb", "BMD_total",
               "Menarche_age_at")

Ethnicity

UKBpheno_add$Ethnicity<-factor(with(UKBpheno_add, ifelse(is.na(Ethnicity) | Ethnicity < 0, NA,
                               ifelse(!is.na(Ethnicity) & Ethnicity > 1000 & Ethnicity < 1999, "European",
                               ifelse(!is.na(Ethnicity) & Ethnicity > 2000 & Ethnicity < 2999, "Mixed",
                               ifelse(!is.na(Ethnicity) & Ethnicity > 3000 & Ethnicity < 3999, "Asian",
                               ifelse(!is.na(Ethnicity) & Ethnicity > 4000 & Ethnicity < 4999, "African",
                               ifelse(!is.na(Ethnicity) & Ethnicity == 5, "Chinese",
                               ifelse(!is.na(Ethnicity) & Ethnicity == 6, "Other", NA)))))))),
                       levels=c("European", "African", "Asian", "Chinese", "Mixed", "Other"))

summary(UKBpheno_add$Ethnicity)
European  African    Asian  Chinese    Mixed    Other     NA's 
  472161     8035     9839     1574     2909     4559     3541 
str(UKBpheno_add)
'data.frame':   502618 obs. of  17 variables:
 $ IID               : int  1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
 $ BFPC_trunk        : num  40.3 40.8 25 21.7 33.9 26.8 32.2 23.9 29.7 41.1 ...
 $ FM_trunk          : num  16.3 17.9 7.2 9.8 17.7 14.2 10.6 8 17.4 17.7 ...
 $ FFM_trunk         : num  24.1 25.9 21.6 35.4 34.5 38.8 22.2 25.5 41.1 25.3 ...
 $ Impedance_wb      : int  653 761 671 491 569 477 720 623 499 653 ...
 $ Ethnicity         : Factor w/ 6 levels "European","African",..: 1 2 1 1 1 1 1 1 1 1 ...
 $ Region            : Factor w/ 16 levels "1","2","3","4",..: 5 5 5 6 10 5 5 6 5 5 ...
 $ Income            : Factor w/ 7 levels "-3","-1","1",..: 4 5 4 5 5 5 1 5 NA 4 ...
 $ Birth_weight      : num  2.83 NA 3.18 NA 3.37 2.95 NA NA 3.12 2.72 ...
 $ Menarche_age_at   : int  13 14 12 NA NA NA 16 13 NA -1 ...
 $ Contraceptive_ever: Factor w/ 4 levels "-3","-1","0",..: 4 4 4 NA NA NA 4 4 NA 3 ...
 $ HRT               : Factor w/ 4 levels "-3","-1","0",..: 4 3 3 NA NA NA 3 4 NA 3 ...
 $ Breastfed         : Factor w/ 4 levels "-3","-1","0",..: 2 2 4 4 3 4 3 2 4 3 ...
 $ VAT               : int  NA NA NA NA NA NA NA NA NA NA ...
 $ BMC_wb            : int  NA NA NA NA NA NA NA NA NA NA ...
 $ BMD_total         : num  NA NA NA NA NA NA NA NA NA NA ...
 $ BipolarInitialQ   : Factor w/ 2 levels "1","2": NA NA NA NA NA NA NA NA NA NA ...
colSums(is.na(UKBpheno_add))
               IID         BFPC_trunk           FM_trunk 
                 0              10506              10533 
         FFM_trunk       Impedance_wb          Ethnicity 
             10617              10263               3541 
            Region             Income       Birth_weight 
              5126               6092             225606 
   Menarche_age_at Contraceptive_ever                HRT 
            229681             229684             229684 
         Breastfed                VAT             BMC_wb 
               973             497509             497448 
         BMD_total    BipolarInitialQ 
            497448             501003 

Merge UKBpheno data and additional UKB data

nrow(UKBpheno)
[1] 502618
UKBpheno_merged <- merge(UKBpheno, UKBpheno_add, all.x = TRUE, sort = FALSE)
dim(UKBpheno_merged)
[1] 502618     44

Import MHQ

Import Mental Health Questionnaire (MHQ) data

MHQ <- fread(file = "data_raw/2019_02_29_BC/EDcases_MHQ.txt", header = TRUE,
                  data.table = FALSE)
dim(MHQ)
[1] 157358     63
# Rename ID column
colnames(MHQ)[colnames(MHQ)=="f.eid"] <- "IID"
# Add MHQ column binary, Yes == 1, MHQ answered
MHQ$MHQ <- 1

# Extract columns
SRmdxcols <- c("SRSchizophrenia", "SRPsychosisOther", "SRDepression",
               "SRManiaBIP", "SRGADandOthers", "SRPanicAttacks",
               "SRAgoraphobia", "SRSocPhobia", "SROtherPhobia",
               "SROCD", "SRPersonalityDisorder", "SRAnorexiaNervosa",
               "SRBulimiaNervosa", "SRBingeEating", "SRASD", "SRADHD",
               "SRPNTA")

# Create new binary column for self-reported cancer
MHQ$SRAnyMDX <- apply(MHQ[,SRmdxcols] == 1, 1, any)

# Recode as factor
MHQ$SRAnyMDX <- factor(MHQ$SRAnyMDX, labels = c(0,1))

summary(MHQ$SRAnyMDX)
     0      1 
107259  50099 
# Recode as factor
UKBfactor <-c("Migrant.Status", "Highest.Qualification", 
              "Smoker",
              "Longstanding.Illness", "Diabetes", "Cancer", "CVD", "Respiratory",
              "SRSocPhobia", "SRSchizophrenia", "SRPsychosisOther", "SRPsychosisAny",
              "SRPersonalityDisorder", "SROtherPhobia",
              "SRPanicAttacks", "SROCD", "SRManiaBIP", "SRDepression", "SRMood",
              "SRBulimiaNervosa", "SRBingeEating", "SRASD", "SRGADandOthers", "SRAnorexiaNervosa",
              "SREatingDisorderAny", "SRAgoraphobia", "SRAnxietyAny", "SRADHD",
              "SmithDepression",
              "PHQ9.No.Info", "PHQ9.Screen", "PHQ9.Items",
              "GAD.Ever", "GAD.Current",
              "Depressed.Ever", "Depressed.Ever.Severe", "Recurrent.Depression", "Single.Depression",
              "SmithBipolar",
              "Cannabis.Ever", "Cannabis.Daily", "Addiction.Ever.SelfReport", "Addiction.Ever",
              "Substance.Addiction.Ever", "Alcohol.Dependence.Ever", "Addiction.Current",
              "Alcohol.Use.Disorder",
              "Trauma.Childhood", "Trauma.Adult", "Trauma.Catastrophic", "PTSD",
              "Self.Harm.Ever", "Self.Harm.Suicide.Attempt", "NoSRConditions", "MHQ")
MHQ[UKBfactor] <- lapply(MHQ[UKBfactor], factor)

# Numeric columns
UKBnumeric <-c("IID", "Age.At.MHQ", "Neuroticism", "AUDIT.Score", "WellbeingScore",
                "PHQ9.Severity", "GAD7.Severity")

Code variables for anorexia nervosa (AN), bulimia nervosa (BN), or binge-eating disorder (BED) without comorbidity

# SRANpure (no comorbidity)
MHQ["SRANpure"] <- NA
MHQ <- within(MHQ, SRANpure[SRAnorexiaNervosa == 1 & 
                            (SRBulimiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
                               SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
                               SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
                               SRBingeEating == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
                               SRADHD == 0)] <- 1) 
MHQ <- within(MHQ, SRANpure[is.na(SRANpure) ] <- 0) 
MHQ$SRANpure <- as.factor(MHQ$SRANpure)
summary(MHQ$SRANpure)
     0      1 
156476    882 
# OLD
# 0      1
# 346039  39703

# SRBNpure (no comorbidity)
MHQ["SRBNpure"] <- NA
MHQ <- within(MHQ, SRBNpure[SRBulimiaNervosa == 1 & 
                            (SRAnorexiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
                               SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
                               SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
                               SRBingeEating == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
                               SRADHD == 0)] <- 1) 
MHQ <- within(MHQ, SRBNpure[is.na(SRBNpure) ] <- 0) 
MHQ$SRBNpure <- as.factor(MHQ$SRBNpure)
summary(MHQ$SRBNpure)
     0      1 
156864    494 
#OLD
# 0      1
# 345716  40026

# SRBEDpure (no comorbidity)
MHQ["SRBEDpure"] <- NA
MHQ <- within(MHQ, SRBEDpure[SRBingeEating == 1 & 
                             (SRAnorexiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
                                SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
                                SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
                                SRBulimiaNervosa == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
                                SRADHD == 0)] <- 1) 
MHQ <- within(MHQ, SRBEDpure[is.na(SRBEDpure) ] <- 0) 
MHQ$SRBEDpure <- as.factor(MHQ$SRBEDpure)
summary(MHQ$SRBEDpure)
     0      1 
156660    698 
# OLD
# 0      1
# 345868  39874

Import medication

Import dichotomised medication

meds_orig <- fread(file = "data_raw/2019_02_29_BC/med_classes.txt", header = TRUE,
                  data.table = FALSE)
dim(meds_orig)
[1] 502619     16
colnames(meds_orig)[colnames(meds_orig)=="id"] <- "IID"
colnames(meds_orig)[colnames(meds_orig)=="Diabetes"] <- "Antidiabetics"
colnames(meds_orig)[colnames(meds_orig)=="HIV"] <- "AntiretroviralMed"
colnames(meds_orig)[colnames(meds_orig)=="Osteoporosis"] <- "Antiosteoporotics"
colnames(meds_orig)[colnames(meds_orig)=="Thyroid"] <- "Thyreostatics"
colnames(meds_orig)[colnames(meds_orig)=="Tuberculosis_Leprosy"] <- "AntitubercularAntileproticMed"

meds_orig$Anxiolytics <- NULL

# Recode as factor
meds_orig_colnames <- colnames(meds_orig)
meds_origfactor <- meds_orig_colnames[-1]
meds_orig[meds_origfactor] <- lapply(meds_orig[meds_origfactor], factor)

# Numeric columns
# Pregnancy and Menopause are numeric for recoding
meds_orignumeric <- c("IID")

meds_anx <- fread(file = "data_raw/2019_02_29_BC/med_anx.txt", header = TRUE,
                  data.table = FALSE)
colnames(meds_anx)[colnames(meds_anx)=="id"] <- "IID"
meds_anx$Anxiolytics <- as.factor(meds_anx$Anxiolytics)

meds <- merge(meds_orig, meds_anx, all = TRUE)

# Check structure
str(meds)
'data.frame':   502619 obs. of  16 variables:
 $ IID                          : int  1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
 $ Corticoids                   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Antidiabetics                : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 1 1 ...
 $ Diuretics                    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Gonadotropins                : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Growth_Hormone               : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ AntiretroviralMed            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ HRT_Contraceptives           : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
 $ Antiosteoporotics            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Testosterone                 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Thyreostatics                : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 2 ...
 $ AntitubercularAntileproticMed: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Antidepressants              : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Antineoplastics              : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
 $ Antipsychotics               : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Anxiolytics                  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

Additional category for metabolic medication and psychotropic medication

# Metabolic medication
meds["MetabolicMed"] <- NA
meds <- within(meds,
               MetabolicMed[Corticoids == 1 | Antidiabetics == 1 | Diuretics == 1 |
                              Gonadotropins == 1 | Growth_Hormone == 1 | AntiretroviralMed == 1 |
                              HRT_Contraceptives == 1 | Antiosteoporotics == 1 | Testosterone == 1 |
                              Thyreostatics == 1 | AntitubercularAntileproticMed == 1 |
                              Antidepressants == 1 | Antineoplastics == 1 |
                              Antipsychotics  == 1] <- 1) 
meds <- within(meds, MetabolicMed[is.na(MetabolicMed)] <- 0) 
meds$MetabolicMed <- as.factor(meds$MetabolicMed)
summary(meds$MetabolicMed)
     0      1 
360327 142292 
meds["PsychotropicMed"] <- NA
meds <- within(meds,
               PsychotropicMed[Corticoids == 1 | Testosterone == 1 |
                              Antidepressants == 1 | Anxiolytics == 1 |
                              Antipsychotics  == 1] <- 1) 
meds <- within(meds, PsychotropicMed[is.na(PsychotropicMed)] <- 0) 
meds$PsychotropicMed <- as.factor(meds$PsychotropicMed)
summary(meds$PsychotropicMed)
     0      1 
450196  52423 

Import ICD diagnoses (main & secondary) and cancer register

dx <- fread(file = "data_raw/2019_02_29_BC/dx_classes.txt", header = TRUE,
                  data.table = FALSE)
dim(dx)
[1] 502619     32
# Recode as factor
dx_colnames <- colnames(dx)
dxfactor <- dx_colnames[-1]
dx[dxfactor] <- lapply(dx[dxfactor], factor)

# Numeric columns
# Pregnancy and Menopause are numeric for recoding
dxnumeric <- c("IID")

# Create variable: any cancer diagnosis from hospital records (HES) or the cancer register
dx["DxCancerAny"] <- NA
dx <- within(dx,
               DxCancerAny[DxCancer1st == 1 | DxCancer2nd == 1 | CancerRegister == 1] <- 1) 
dx <- within(dx, DxCancerAny[is.na(DxCancerAny)] <- 0) 
dx$DxCancerAny <- as.factor(dx$DxCancerAny)
summary(dx$DxCancerAny)
     0      1 
429641  72978 
# Create variable: any diagnoses affecting body composition
dx["DxBodyCompAny"] <- NA
dx <- within(dx,
             DxBodyCompAny[DxCancer1st == 1 | DxCancer2nd == 1 | CancerRegister == 1 |
                             DxConnectiveTissue1st == 1 | DxConnectiveTissue2nd == 1 |
                             DxDiabetes1st == 1 | DxDxdiabetes2nd == 1 |
                             DxEndocrine1st == 1 | DxEndocrine2nd == 1 |
                             DxGlucose1st == 1 | DxGlucose2nd == 1 |
                             DxHIV1st == 1 | DxHIV2nd == 1 |
                             DxIBD1st == 1 | DxIBD2nd == 1 |
                             DxIBS1st == 1 | DxIBS2nd == 1 |
                             DxLiver1st == 1 | DxLiver2nd == 1 |
                             DxPsy1st == 1 | DxPsy2nd == 1 |
                             DxMetabolic1st == 1 | DxMetabolic2nd == 1 |
                             DxMuscles1st == 1 | DxMuscles2nd == 1 |
                             DxPancreatitis1st == 1 | DxPancreatitis2nd == 1 |
                             DxThyroid1st == 1 | DxThyroid2nd == 1 |
                             DxTuberculosis1st == 1 | DxTuberculosis2nd == 1] <- 1) 
dx <- within(dx, DxBodyCompAny[is.na(DxBodyCompAny)] <- 0) 
dx$DxBodyCompAny <- as.factor(dx$DxBodyCompAny)
summary(dx$DxBodyCompAny)
     0      1 
330752 171867 
# Create new data frame
dxmerged <- as.data.frame(dx[,c("IID", "DxCancerAny", "DxBodyCompAny", "CancerRegister")])

# New data frame with merged diagnoses
dxmerged$DxConnectiveTissue <- ifelse(dx$DxConnectiveTissue1st == 1 | dx$DxConnectiveTissue2nd == 1, 1, 0)
dxmerged$DxDiabetes  <- ifelse(dx$DxDiabetes1st == 1 | dx$DxDxdiabetes2nd == 1, 1, 0)
dxmerged$DxEndocrine <- ifelse(dx$DxEndocrine1st == 1 | dx$DxEndocrine2nd == 1, 1, 0)
dxmerged$DxGlucose  <- ifelse(dx$DxGlucose1st == 1 | dx$DxGlucose2nd == 1, 1, 0)
dxmerged$DxHIV  <- ifelse(dx$DxHIV1st == 1 | dx$DxHIV2nd == 1, 1, 0)
dxmerged$DxIBD  <- ifelse(dx$DxIBD1st == 1 | dx$DxIBD2nd == 1, 1, 0)
dxmerged$DxIBS  <- ifelse(dx$DxIBS1st == 1 | dx$DxIBS2nd == 1, 1, 0)
dxmerged$DxLiver  <- ifelse(dx$DxLiver1st == 1 | dx$DxLiver2nd == 1, 1, 0)
dxmerged$DxPsy  <- ifelse(dx$DxPsy1st == 1 | dx$DxPsy2nd == 1, 1, 0)
dxmerged$DxMetabolic  <- ifelse(dx$DxMetabolic1st == 1 | dx$DxMetabolic2nd == 1, 1, 0)
dxmerged$DxMuscles  <- ifelse(dx$DxMuscles1st == 1 | dx$DxMuscles2nd == 1, 1, 0)
dxmerged$DxPancreatitis  <- ifelse(dx$DxPancreatitis1st == 1 | dx$DxPancreatitis2nd == 1, 1, 0)
dxmerged$DxThyroid  <- ifelse(dx$DxThyroid1st == 1 | dx$DxThyroid2nd == 1, 1, 0)
dxmerged$DxTuberculosis  <- ifelse(dx$DxTuberculosis1st == 1 | dx$DxTuberculosis2nd == 1, 1, 0)

# Recode as factor
dxmerged_colnames <- colnames(dxmerged)
dxmergedfactor <- dxmerged_colnames[-3]
dxmerged[dxmergedfactor] <- lapply(dxmerged[dxmergedfactor], factor)

summary(dxmerged)
      IID         DxCancerAny DxBodyCompAny CancerRegister
 1000015:     1   0:429641    0:330752      0:437662      
 1000027:     1   1: 72978    1:171867      1: 64957      
 1000039:     1                                           
 1000040:     1                                           
 1000053:     1                                           
 1000064:     1                                           
 (Other):502613                                           
 DxConnectiveTissue DxDiabetes DxEndocrine DxGlucose  DxHIV     
 0:498947           0:476606   0:499402    0:501410   0:502408  
 1:  3672           1: 26013   1:  3217    1:  1209   1:   211  
                                                                
                                                                
                                                                
                                                                
                                                                
 DxIBD      DxIBS      DxLiver    DxPsy      DxMetabolic DxMuscles 
 0:479134   0:495942   0:496401   0:463308   0:447983    0:501216  
 1: 23485   1:  6677   1:  6218   1: 39311   1: 54636    1:  1403  
                                                                   
                                                                   
                                                                   
                                                                   
                                                                   
 DxPancreatitis DxThyroid  DxTuberculosis
 0:501978       0:481757   0:502334      
 1:   641       1: 20862   1:   285      
                                         
                                         
                                         
                                         
                                         

Import ICD eating disorders

ICDF <- fread(file = "data_raw/2019_03_02_EDcases/ICD_F.txt",
                  header = TRUE,
                  data.table = FALSE)
dim(ICDF)
[1] 502619    525
ICDEDraw <- ICDF %>%
  select(., IID, contains("F50"))

ICDEDraw$F500 <- with(ICDEDraw, ifelse(F500m == 1 | F500s == 1, 1, 0))
ICDEDraw$F501 <- with(ICDEDraw, ifelse(F501m == 1 | F501s == 1, 1, 0))
ICDEDraw$F502 <- with(ICDEDraw, ifelse(F502m == 1 | F502s == 1, 1, 0))
ICDEDraw$F505 <- with(ICDEDraw, ifelse(F505m == 1 | F505s == 1, 1, 0))
ICDEDraw$F508 <- with(ICDEDraw, ifelse(F508m == 1 | F508s == 1, 1, 0))
ICDEDraw$F509 <- with(ICDEDraw, ifelse(F509m == 1 | F509s == 1, 1, 0))

ICDED <- ICDEDraw %>%
  select(., IID, matches("\\d$"))

rm(ICDEDraw)

# Recode as factor
ICDEDfactor <- colnames(ICDED)
ICDED[ICDEDfactor] <- lapply(ICDED[ICDEDfactor], factor)

# Check structure
str(ICDED)
'data.frame':   502619 obs. of  7 variables:
 $ IID : Factor w/ 502619 levels "1000015","1000027",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ F500: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ F501: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ F502: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ F505: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ F508: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ F509: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
summary(ICDED)
      IID         F500       F501       F502       F505       F508      
 1000015:     1   0:502560   0:502616   0:502593   0:502616   0:502613  
 1000027:     1   1:    59   1:     3   1:    26   1:     3   1:     6  
 1000039:     1                                                         
 1000040:     1                                                         
 1000053:     1                                                         
 1000064:     1                                                         
 (Other):502613                                                         
 F509      
 0:502588  
 1:    31  
           
           
           
           
           

Merge ICD diagnoses categories and ICD eating disorder diagnoses

dxICD <- merge(dxmerged, ICDED, all = TRUE, sort = FALSE)

Import self-reported diagnoses

SRillness <- fread(input = "data_raw/2019_02_29_BC/SRillness_columns.txt",
                  header = TRUE,
                  data.table = FALSE)
dim(SRillness)
[1] 502618     88
# Rename ID column
colnames(SRillness)[colnames(SRillness)=="f.eid"] <- "IID"

#Vector with codes for self-reported illness to exclude
SRillness_exclusion = c(1136, 1154, 1155, 1156, 1157, 1158, 1164, 1165, 1192,
                     1193, 1194, 1220, 1222, 1223, 1224, 1225, 1226, 1228,
                     1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238,
                     1239, 1243, 1252, 1259, 1260, 1262, 1263, 1276, 1286,
                     1287, 1289, 1290, 1291, 1293, 1297, 1308, 1309, 1310,
                     1313, 1322, 1350, 1373, 1376, 1377, 1378, 1379, 1380,
                     1381, 1382, 1383, 1384, 1403, 1404, 1408, 1409, 1410,
                     1428, 1429, 1430, 1431, 1432, 1437, 1439, 1440, 1456,
                     1461, 1462, 1463, 1464, 1468, 1469, 1470, 1477, 1480,
                     1481, 1519, 1520, 1521, 1522, 1531, 1556, 1579, 1580,
                     1604, 1607, 1608, 1609, 1611, 1615, 1617, 1657, 1664, 1682)

# Logic to exclude those ilnesses
# Any visit
SRillness$SRAnyIllnessBC <- 
  with(SRillness, ifelse(!(f.20002.0.0 %in% SRillness_exclusion) & !(f.20002.0.1 %in% SRillness_exclusion) &
           !(f.20002.0.2 %in% SRillness_exclusion) & !(f.20002.0.3 %in% SRillness_exclusion) &
           !(f.20002.0.4 %in% SRillness_exclusion) & !(f.20002.0.5 %in% SRillness_exclusion) &
           !(f.20002.0.6 %in% SRillness_exclusion) & !(f.20002.0.7 %in% SRillness_exclusion) &
           !(f.20002.0.8 %in% SRillness_exclusion) & !(f.20002.0.9 %in% SRillness_exclusion) &
           !(f.20002.0.10 %in% SRillness_exclusion) & !(f.20002.0.11 %in% SRillness_exclusion) &
           !(f.20002.0.12 %in% SRillness_exclusion) & !(f.20002.0.13 %in% SRillness_exclusion) &
           !(f.20002.0.14 %in% SRillness_exclusion) & !(f.20002.0.15 %in% SRillness_exclusion) &
           !(f.20002.0.16 %in% SRillness_exclusion) & !(f.20002.0.17 %in% SRillness_exclusion) &
           !(f.20002.0.18 %in% SRillness_exclusion) & !(f.20002.0.19 %in% SRillness_exclusion) &
           !(f.20002.0.20 %in% SRillness_exclusion) & !(f.20002.0.21 %in% SRillness_exclusion) &
           !(f.20002.0.22 %in% SRillness_exclusion) & !(f.20002.0.23 %in% SRillness_exclusion) & 
           !(f.20002.0.24 %in% SRillness_exclusion) & !(f.20002.0.25 %in% SRillness_exclusion) &
           !(f.20002.0.26 %in% SRillness_exclusion) & !(f.20002.0.27 %in% SRillness_exclusion) &
           !(f.20002.0.28 %in% SRillness_exclusion) &
           !(f.20002.1.0 %in% SRillness_exclusion) &
           !(f.20002.1.1 %in% SRillness_exclusion) & !(f.20002.1.2 %in% SRillness_exclusion) &
           !(f.20002.1.3 %in% SRillness_exclusion) & !(f.20002.1.4 %in% SRillness_exclusion) &
           !(f.20002.1.5 %in% SRillness_exclusion) & !(f.20002.1.6 %in% SRillness_exclusion) &
           !(f.20002.1.7 %in% SRillness_exclusion) & !(f.20002.1.8 %in% SRillness_exclusion) &
           !(f.20002.1.9 %in% SRillness_exclusion) & !(f.20002.1.10 %in% SRillness_exclusion) &
           !(f.20002.1.11 %in% SRillness_exclusion) & !(f.20002.1.12 %in% SRillness_exclusion) &
           !(f.20002.1.13 %in% SRillness_exclusion) & !(f.20002.1.14 %in% SRillness_exclusion) &
           !(f.20002.1.15 %in% SRillness_exclusion) &
           !(f.20002.2.0 %in% SRillness_exclusion) &
           !(f.20002.2.1 %in% SRillness_exclusion) & !(f.20002.2.2 %in% SRillness_exclusion) &
           !(f.20002.2.3 %in% SRillness_exclusion) & !(f.20002.2.4 %in% SRillness_exclusion) &
           !(f.20002.2.5 %in% SRillness_exclusion) & !(f.20002.2.6 %in% SRillness_exclusion) &
           !(f.20002.2.7 %in% SRillness_exclusion) & !(f.20002.2.8 %in% SRillness_exclusion) &
           !(f.20002.2.9 %in% SRillness_exclusion) & !(f.20002.2.10 %in% SRillness_exclusion) &
           !(f.20002.2.11 %in% SRillness_exclusion) & !(f.20002.2.12 %in% SRillness_exclusion) &
           !(f.20002.2.13 %in% SRillness_exclusion) & !(f.20002.2.14 %in% SRillness_exclusion) &
           !(f.20002.2.15 %in% SRillness_exclusion) & !(f.20002.2.16 %in% SRillness_exclusion),
         0, 1))

SRillness$SRAnyIllnessBC <- as.factor(SRillness$SRAnyIllnessBC)
summary(SRillness$SRAnyIllnessBC)
     0      1 
374256 128362 
# Baseline visit .0
SRillness$SRAnyIllnessBC.0 <- 
  with(SRillness, ifelse(!(f.20002.0.0 %in% SRillness_exclusion) & !(f.20002.0.1 %in% SRillness_exclusion) &
           !(f.20002.0.2 %in% SRillness_exclusion) & !(f.20002.0.3 %in% SRillness_exclusion) &
           !(f.20002.0.4 %in% SRillness_exclusion) & !(f.20002.0.5 %in% SRillness_exclusion) &
           !(f.20002.0.6 %in% SRillness_exclusion) & !(f.20002.0.7 %in% SRillness_exclusion) &
           !(f.20002.0.8 %in% SRillness_exclusion) & !(f.20002.0.9 %in% SRillness_exclusion) &
           !(f.20002.0.10 %in% SRillness_exclusion) & !(f.20002.0.11 %in% SRillness_exclusion) &
           !(f.20002.0.12 %in% SRillness_exclusion) & !(f.20002.0.13 %in% SRillness_exclusion) &
           !(f.20002.0.14 %in% SRillness_exclusion) & !(f.20002.0.15 %in% SRillness_exclusion) &
           !(f.20002.0.16 %in% SRillness_exclusion) & !(f.20002.0.17 %in% SRillness_exclusion) &
           !(f.20002.0.18 %in% SRillness_exclusion) & !(f.20002.0.19 %in% SRillness_exclusion) &
           !(f.20002.0.20 %in% SRillness_exclusion) & !(f.20002.0.21 %in% SRillness_exclusion) &
           !(f.20002.0.22 %in% SRillness_exclusion) & !(f.20002.0.23 %in% SRillness_exclusion) & 
           !(f.20002.0.24 %in% SRillness_exclusion) & !(f.20002.0.25 %in% SRillness_exclusion) &
           !(f.20002.0.26 %in% SRillness_exclusion) & !(f.20002.0.27 %in% SRillness_exclusion) &
           !(f.20002.0.28 %in% SRillness_exclusion),
         0, 1))

SRillness$SRAnyIllnessBC.0 <- as.factor(SRillness$SRAnyIllnessBC.0)
summary(SRillness$SRAnyIllnessBC.0)
     0      1 
378244 124374 
# First repeat .1
SRillness$SRAnyIllnessBC.1 <- 
  with(SRillness, ifelse(!(f.20002.1.1 %in% SRillness_exclusion) & !(f.20002.1.2 %in% SRillness_exclusion) &
           !(f.20002.1.3 %in% SRillness_exclusion) & !(f.20002.1.4 %in% SRillness_exclusion) &
           !(f.20002.1.5 %in% SRillness_exclusion) & !(f.20002.1.6 %in% SRillness_exclusion) &
           !(f.20002.1.7 %in% SRillness_exclusion) & !(f.20002.1.8 %in% SRillness_exclusion) &
           !(f.20002.1.9 %in% SRillness_exclusion) & !(f.20002.1.10 %in% SRillness_exclusion) &
           !(f.20002.1.11 %in% SRillness_exclusion) & !(f.20002.1.12 %in% SRillness_exclusion) &
           !(f.20002.1.13 %in% SRillness_exclusion) & !(f.20002.1.14 %in% SRillness_exclusion) &
           !(f.20002.1.15 %in% SRillness_exclusion),
         0, 1))

SRillness$SRAnyIllnessBC.1 <- as.factor(SRillness$SRAnyIllnessBC.1)
summary(SRillness$SRAnyIllnessBC.1)
     0      1 
498211   4407 
# Logic to exclude those ilnesses
# Any visit
SRillness$SRAnyIllnessBC.2 <- 
  with(SRillness, ifelse(!(f.20002.2.0 %in% SRillness_exclusion) &
           !(f.20002.2.1 %in% SRillness_exclusion) & !(f.20002.2.2 %in% SRillness_exclusion) &
           !(f.20002.2.3 %in% SRillness_exclusion) & !(f.20002.2.4 %in% SRillness_exclusion) &
           !(f.20002.2.5 %in% SRillness_exclusion) & !(f.20002.2.6 %in% SRillness_exclusion) &
           !(f.20002.2.7 %in% SRillness_exclusion) & !(f.20002.2.8 %in% SRillness_exclusion) &
           !(f.20002.2.9 %in% SRillness_exclusion) & !(f.20002.2.10 %in% SRillness_exclusion) &
           !(f.20002.2.11 %in% SRillness_exclusion) & !(f.20002.2.12 %in% SRillness_exclusion) &
           !(f.20002.2.13 %in% SRillness_exclusion) & !(f.20002.2.14 %in% SRillness_exclusion) &
           !(f.20002.2.15 %in% SRillness_exclusion) & !(f.20002.2.16 %in% SRillness_exclusion),
         0, 1))

SRillness$SRAnyIllnessBC.2 <- as.factor(SRillness$SRAnyIllnessBC.2)
summary(SRillness$SRAnyIllnessBC.2)
     0      1 
498903   3715 

Self-reported psychiatric disorder

SRPsy_exclusion = c(1286, # depression
                     1287, # anxiety, panic attacks
                     1289, # schizophrenia
                     1290, # self-harm, suicide attempts
                     1291, # mania, bipolar
                     1408, # alcohol
                     1409, # opoid
                     1410, # substance
                     1469, # PTSD
                     1470, # AN, BN, OED
                     1615) # OCD

SRillness$SRAnyPsyIllnessBC <- 
  with(SRillness, ifelse(!(f.20002.0.0 %in% SRPsy_exclusion) & !(f.20002.0.1 %in% SRPsy_exclusion) &
                           !(f.20002.0.2 %in% SRPsy_exclusion) & !(f.20002.0.3 %in% SRPsy_exclusion) &
                           !(f.20002.0.4 %in% SRPsy_exclusion) & !(f.20002.0.5 %in% SRPsy_exclusion) &
                           !(f.20002.0.6 %in% SRPsy_exclusion) & !(f.20002.0.7 %in% SRPsy_exclusion) &
                           !(f.20002.0.8 %in% SRPsy_exclusion) & !(f.20002.0.9 %in% SRPsy_exclusion) &
                           !(f.20002.0.10 %in% SRPsy_exclusion) & !(f.20002.0.11 %in% SRPsy_exclusion) &
                           !(f.20002.0.12 %in% SRPsy_exclusion) & !(f.20002.0.13 %in% SRPsy_exclusion) &
                           !(f.20002.0.14 %in% SRPsy_exclusion) & !(f.20002.0.15 %in% SRPsy_exclusion) &
                           !(f.20002.0.16 %in% SRPsy_exclusion) & !(f.20002.0.17 %in% SRPsy_exclusion) &
                           !(f.20002.0.18 %in% SRPsy_exclusion) & !(f.20002.0.19 %in% SRPsy_exclusion) &
                           !(f.20002.0.20 %in% SRPsy_exclusion) & !(f.20002.0.21 %in% SRPsy_exclusion) &
                           !(f.20002.0.22 %in% SRPsy_exclusion) & !(f.20002.0.23 %in% SRPsy_exclusion) & 
                           !(f.20002.0.24 %in% SRPsy_exclusion) & !(f.20002.0.25 %in% SRPsy_exclusion) &
                           !(f.20002.0.26 %in% SRPsy_exclusion) & !(f.20002.0.27 %in% SRPsy_exclusion) &
                           !(f.20002.0.28 %in% SRPsy_exclusion) &
                           !(f.20002.1.0 %in% SRPsy_exclusion) &
                           !(f.20002.1.1 %in% SRPsy_exclusion) & !(f.20002.1.2 %in% SRPsy_exclusion) &
                           !(f.20002.1.3 %in% SRPsy_exclusion) & !(f.20002.1.4 %in% SRPsy_exclusion) &
                           !(f.20002.1.5 %in% SRPsy_exclusion) & !(f.20002.1.6 %in% SRPsy_exclusion) &
                           !(f.20002.1.7 %in% SRPsy_exclusion) & !(f.20002.1.8 %in% SRPsy_exclusion) &
                           !(f.20002.1.9 %in% SRPsy_exclusion) & !(f.20002.1.10 %in% SRPsy_exclusion) &
                           !(f.20002.1.11 %in% SRPsy_exclusion) & !(f.20002.1.12 %in% SRPsy_exclusion) &
                           !(f.20002.1.13 %in% SRPsy_exclusion) & !(f.20002.1.14 %in% SRPsy_exclusion) &
                           !(f.20002.1.15 %in% SRPsy_exclusion) &
                           !(f.20002.2.0 %in% SRPsy_exclusion) &
                           !(f.20002.2.1 %in% SRPsy_exclusion) & !(f.20002.2.2 %in% SRPsy_exclusion) &
                           !(f.20002.2.3 %in% SRPsy_exclusion) & !(f.20002.2.4 %in% SRPsy_exclusion) &
                           !(f.20002.2.5 %in% SRPsy_exclusion) & !(f.20002.2.6 %in% SRPsy_exclusion) &
                           !(f.20002.2.7 %in% SRPsy_exclusion) & !(f.20002.2.8 %in% SRPsy_exclusion) &
                           !(f.20002.2.9 %in% SRPsy_exclusion) & !(f.20002.2.10 %in% SRPsy_exclusion) &
                           !(f.20002.2.11 %in% SRPsy_exclusion) & !(f.20002.2.12 %in% SRPsy_exclusion) &
                           !(f.20002.2.13 %in% SRPsy_exclusion) & !(f.20002.2.14 %in% SRPsy_exclusion) &
                           !(f.20002.2.15 %in% SRPsy_exclusion) & !(f.20002.2.16 %in% SRPsy_exclusion),
                         0, 1))

SRillness$SRAnyPsyIllness <- as.factor(SRillness$SRAnyPsyIllness)
summary(SRillness$SRAnyPsyIllness)
     0      1 
464923  37695 

Self-reported neurolgical disorder

# Self-reported neurological disorder
SRNeuro_exclusion = c(1082, 1083, 1086, 1524, 1262, 1397, 1683, 1245, 1246,
                      1491, 1425, 1433, 1258, 1263, 1264, 1266, 1244, 1583,
                      1659, 1259, 1240, 1434)

SRillness$SRAnyNeuroIllnessBC <- 
  with(SRillness, ifelse(!(f.20002.0.0 %in% SRNeuro_exclusion) & !(f.20002.0.1 %in% SRNeuro_exclusion) &
                           !(f.20002.0.2 %in% SRNeuro_exclusion) & !(f.20002.0.3 %in% SRNeuro_exclusion) &
                           !(f.20002.0.4 %in% SRNeuro_exclusion) & !(f.20002.0.5 %in% SRNeuro_exclusion) &
                           !(f.20002.0.6 %in% SRNeuro_exclusion) & !(f.20002.0.7 %in% SRNeuro_exclusion) &
                           !(f.20002.0.8 %in% SRNeuro_exclusion) & !(f.20002.0.9 %in% SRNeuro_exclusion) &
                           !(f.20002.0.10 %in% SRNeuro_exclusion) & !(f.20002.0.11 %in% SRNeuro_exclusion) &
                           !(f.20002.0.12 %in% SRNeuro_exclusion) & !(f.20002.0.13 %in% SRNeuro_exclusion) &
                           !(f.20002.0.14 %in% SRNeuro_exclusion) & !(f.20002.0.15 %in% SRNeuro_exclusion) &
                           !(f.20002.0.16 %in% SRNeuro_exclusion) & !(f.20002.0.17 %in% SRNeuro_exclusion) &
                           !(f.20002.0.18 %in% SRNeuro_exclusion) & !(f.20002.0.19 %in% SRNeuro_exclusion) &
                           !(f.20002.0.20 %in% SRNeuro_exclusion) & !(f.20002.0.21 %in% SRNeuro_exclusion) &
                           !(f.20002.0.22 %in% SRNeuro_exclusion) & !(f.20002.0.23 %in% SRNeuro_exclusion) & 
                           !(f.20002.0.24 %in% SRNeuro_exclusion) & !(f.20002.0.25 %in% SRNeuro_exclusion) &
                           !(f.20002.0.26 %in% SRNeuro_exclusion) & !(f.20002.0.27 %in% SRNeuro_exclusion) &
                           !(f.20002.0.28 %in% SRNeuro_exclusion) &
                           !(f.20002.1.0 %in% SRNeuro_exclusion) &
                           !(f.20002.1.1 %in% SRNeuro_exclusion) & !(f.20002.1.2 %in% SRNeuro_exclusion) &
                           !(f.20002.1.3 %in% SRNeuro_exclusion) & !(f.20002.1.4 %in% SRNeuro_exclusion) &
                           !(f.20002.1.5 %in% SRNeuro_exclusion) & !(f.20002.1.6 %in% SRNeuro_exclusion) &
                           !(f.20002.1.7 %in% SRNeuro_exclusion) & !(f.20002.1.8 %in% SRNeuro_exclusion) &
                           !(f.20002.1.9 %in% SRNeuro_exclusion) & !(f.20002.1.10 %in% SRNeuro_exclusion) &
                           !(f.20002.1.11 %in% SRNeuro_exclusion) & !(f.20002.1.12 %in% SRNeuro_exclusion) &
                           !(f.20002.1.13 %in% SRNeuro_exclusion) & !(f.20002.1.14 %in% SRNeuro_exclusion) &
                           !(f.20002.1.15 %in% SRNeuro_exclusion) &
                           !(f.20002.2.0 %in% SRNeuro_exclusion) &
                           !(f.20002.2.1 %in% SRNeuro_exclusion) & !(f.20002.2.2 %in% SRNeuro_exclusion) &
                           !(f.20002.2.3 %in% SRNeuro_exclusion) & !(f.20002.2.4 %in% SRNeuro_exclusion) &
                           !(f.20002.2.5 %in% SRNeuro_exclusion) & !(f.20002.2.6 %in% SRNeuro_exclusion) &
                           !(f.20002.2.7 %in% SRNeuro_exclusion) & !(f.20002.2.8 %in% SRNeuro_exclusion) &
                           !(f.20002.2.9 %in% SRNeuro_exclusion) & !(f.20002.2.10 %in% SRNeuro_exclusion) &
                           !(f.20002.2.11 %in% SRNeuro_exclusion) & !(f.20002.2.12 %in% SRNeuro_exclusion) &
                           !(f.20002.2.13 %in% SRNeuro_exclusion) & !(f.20002.2.14 %in% SRNeuro_exclusion) &
                           !(f.20002.2.15 %in% SRNeuro_exclusion) & !(f.20002.2.16 %in% SRNeuro_exclusion),
                         0, 1))

SRillness$SRAnyNeuroIllness <- as.factor(SRillness$SRAnyNeuroIllness)
summary(SRillness$SRAnyNeuroIllness)
     0      1 
489801  12817 
SRillness_red <- SRillness %>% select(IID,
                                      SRAnyIllnessBC,
                                      SRAnyIllnessBC.0, SRAnyIllnessBC.1, SRAnyIllnessBC.2,
                                      SRAnyPsyIllness,
                                      SRAnyNeuroIllness)

Self-reported CVD illness

SRCVD <- fread(input = "data_raw/2019_02_29_BC/CVD_columns.txt",
                   header = TRUE,
                   data.table = FALSE)
dim(SRCVD)
[1] 502618     13
# Rename ID column
colnames(SRCVD)[colnames(SRCVD)=="f.eid"] <- "IID"

# Extract columns
SRCVD_cols <- colnames(SRCVD[,-1])

# Create new binary column for self-reported CVD
SRCVD$SRHeartAttack <- apply(SRCVD[,SRCVD_cols] == 1, 1, any)
SRCVD$SRHeartAttack <- factor(SRCVD$SRHeartAttack, labels = c(1))

SRCVD$SRAngina <- apply(SRCVD[,SRCVD_cols] == 2, 1, any)
SRCVD$SRAngina <- factor(SRCVD$SRAngina, labels = c(1))

SRCVD$SRStroke <- apply(SRCVD[,SRCVD_cols] == 3, 1, any)
SRCVD$SRStroke <- factor(SRCVD$SRStroke, labels = c(1))

SRCVD$SRHighBloodPressure <- apply(SRCVD[,SRCVD_cols] == 4, 1, any)
SRCVD$SRHighBloodPressure <- factor(SRCVD$SRHighBloodPressure, labels = c(1))

#New data frame
SRCVD_red <- SRCVD %>%
  select(IID, SRHeartAttack, SRAngina, SRStroke, SRHighBloodPressure)

summary(SRCVD_red)
      IID          SRHeartAttack SRAngina      SRStroke     
 Min.   :1000015   1   : 11849   1   : 16490   1   :  7901  
 1st Qu.:2256563   NA's:490769   NA's:486128   NA's:494717  
 Median :3513112                                            
 Mean   :3513108                                            
 3rd Qu.:4769654                                            
 Max.   :6026196                                            
 SRHighBloodPressure
 1   :138100        
 NA's:364518        
                    
                    
                    
                    

Import self-reported cancer

SRcancer <- fread(input = "data_raw/2019_02_29_BC/SRcancer.txt",
                   header = TRUE,
                   data.table = FALSE)

# Extract columns
SRcancer_cols <- colnames(SRcancer[,-1])

# Create new binary column for self-reported cancer
SRcancer$SRAnyCancer <- apply(SRcancer[,SRcancer_cols] == 1, 1, any)

# Recode as factor
SRcancer$SRAnyCancer <- factor(SRcancer$SRAnyCancer, labels = c(0,1))

summary(SRcancer$SRAnyCancer)
     0      1 
459285  43333 
# Vector with codes for self-reported cancer to exclude for body composition
SRcancerBC_exclusion <-c(1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 
                    1012, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 
                    1025, 1026, 1027, 1028, 1031, 1032, 1033, 1034, 1035, 1036, 1037,
                    1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048,
                    1050, 1051, 1052, 1053, 1055, 1056, 1058, 1059, 1060, 1061, 1062,
                    1063, 1064, 1065, 1066, 1067, 1068, 1070, 1071, 1073, 1074, 1075,
                    1076, 1077, 1078, 1079, 1080, 1081, 1082, 1084, 1085, 1086, 1087, 1088)

# Create data frame that only contains the columns for extraction
SRcancerBC <- select(.data = SRcancer, IID, matches(paste(SRcancerBC_exclusion, collapse = "|")))

# Create new binary column if relevant for exclusion for body composition
SRcancer$SRAnyCancerBC <- apply(SRcancerBC[,-1] == 1, 1, any)

# Recode as factor
SRcancer$SRAnyCancerBC <- factor(SRcancer$SRAnyCancerBC, labels = c(0,1))

summary(SRcancer$SRAnyCancerBC)
     0      1 
461639  40979 
# Vector with codes for self-reported cancer to exclude for neurology
SRcancerNeuro_exclusion <-c(1031, 1032)

# Create data frame that only contains the columns for extraction
SRcancerNeuro <- select(.data = SRcancer, IID, matches(paste(SRcancerNeuro_exclusion, collapse = "|")))

# Create new binary column if relevant for exclusion for neurology
SRcancer$SRAnyCancerNeuro <- apply(SRcancerNeuro[,-1] == 1, 1, any)

# Recode as factor
SRcancer$SRAnyCancerNeuro <- factor(SRcancer$SRAnyCancerNeuro, labels = c(0,1))

summary(SRcancer$SRAnyCancerNeuro)
     0      1 
502351    267 
#New data frame
SRcancer_red <- SRcancer %>%
  select("IID", "SRAnyCancer", "SRAnyCancerBC", "SRAnyCancerNeuro")

SRcancerBC_red <- SRcancer %>%
  select("IID", "SRAnyCancerBC")

SRcancer_merge <- merge(SRcancer_red, SRcancerBC_red, all = TRUE, sort = F)

str(SRcancer_merge)
'data.frame':   502618 obs. of  4 variables:
 $ IID             : int  1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
 $ SRAnyCancerBC   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
 $ SRAnyCancer     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
 $ SRAnyCancerNeuro: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

Import diagnosis categories: autoimmune

SRautoimmune.inflammatory <- fread(input = "inflammatory_autoimmune/ICD.dx.txt",
                                   header = TRUE,
                                   data.table = FALSE)

Merge data frames

Merge UKB phenotypes with MHQ, medication (meds), and ICD diagnoses (dxICD)

dim(UKBpheno_merged)
[1] 502618     44
dim(MHQ)
[1] 157358     68
UKBpheno_MHQ <- merge(UKBpheno_merged, MHQ, all.x = TRUE, sort = FALSE)

dim(UKBpheno_MHQ)
[1] 502618    111
dim(meds)
[1] 502619     18
UKBpheno_MHQ_meds <- merge(UKBpheno_MHQ, meds, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ)

dim(UKBpheno_MHQ_meds)
[1] 502618    128
dim(dxICD)
[1] 502619     24
UKBpheno_MHQ_meds_dx <- merge(UKBpheno_MHQ_meds, dxICD, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds)

dim(UKBpheno_MHQ_meds_dx)
[1] 502618    151
dim(SRillness_red)
[1] 502618      7
UKBpheno_MHQ_meds_dx_SRillness <- merge(UKBpheno_MHQ_meds_dx, SRillness_red, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx)

dim(UKBpheno_MHQ_meds_dx_SRillness)
[1] 502618    157
dim(SRcancer_merge)
[1] 502618      4
UKBpheno_MHQ_meds_dx_SRillness_SRcancer <- merge(UKBpheno_MHQ_meds_dx_SRillness, SRcancer_merge, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness)

dim(UKBpheno_MHQ_meds_dx_SRillness_SRcancer)
[1] 502618    160
dim(SRCVD_red)
[1] 502618      5
UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD <- merge(UKBpheno_MHQ_meds_dx_SRillness_SRcancer, SRCVD_red, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness_SRcancer)

dim(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD)
[1] 502618    164
dim(SRCVD_red)
[1] 502618      5
UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD_SRautoimmune <- merge(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD, SRautoimmune.inflammatory, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD)

pheno <- UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD_SRautoimmune
pheno$SRAnorexiaNervosa.numeric <- as.numeric(as.character(pheno$SRAnorexiaNervosa))
pheno$F500.numeric <- as.integer(as.character(pheno$F500))
pheno$F501.numeric <- as.numeric(as.character(pheno$F501))
pheno$F502.numeric <- as.numeric(as.character(pheno$F502))
pheno$F505.numeric <- as.numeric(as.character(pheno$F505))
pheno$F508.numeric <- as.numeric(as.character(pheno$F508))
pheno$F509.numeric <- as.numeric(as.character(pheno$F509))
pheno$SRBulimiaNervosa.numeric <- as.numeric(as.character(pheno$SRBulimiaNervosa))
pheno$SRBingeEating.numeric <- as.numeric(as.character(pheno$SRBingeEating))

ED.orig.numeric <- c("SRAnorexiaNervosa.numeric",
                     "SRBulimiaNervosa.numeric",
                     "SRBingeEating.numeric",
                     "F500.numeric", "F501.numeric",
                     "F502.numeric",
                     "F505.numeric", "F508.numeric", "F509.numeric")

AN.orig.numeric <- c("SRAnorexiaNervosa.numeric",
                     "F500.numeric", "F501.numeric")

BN.orig.numeric <- c("SRBulimiaNervosa.numeric",
                     "F502.numeric")

upset(pheno,
      sets = ED.orig.numeric)
Warning: Removed 3 rows containing missing values (geom_bar).

EDdataframe <- pheno %>%
  select(ED.orig.numeric) %>%
  filter_at(vars(ED.orig.numeric), any_vars(.==1))

EDdataframe[is.na(EDdataframe)] <- 0

colnames(EDdataframe)
[1] "SRAnorexiaNervosa.numeric" "SRBulimiaNervosa.numeric" 
[3] "SRBingeEating.numeric"     "F500.numeric"             
[5] "F501.numeric"              "F502.numeric"             
[7] "F505.numeric"              "F508.numeric"             
[9] "F509.numeric"             
upset(data = EDdataframe,
      sets = AN.orig.numeric)

upset(data = EDdataframe,
      sets = BN.orig.numeric)

upset(EDdataframe,
      sets = ED.orig.numeric)

pdf(file = paste0("plots/upset_eds_ukb_",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(EDdataframe, sets = ED.orig.numeric)
dev.off()
quartz_off_screen 
                2 

Eating disorders crosstables

summary(pheno$SRAnorexiaNervosa)
     0      1   NA's 
156467    891 345260 
summary(pheno$F500)
     0      1 
502559     59 
summary(pheno$F501)
     0      1 
502615      3 
ftable(pheno$SRAnorexiaNervosa, pheno$F500, pheno$F501,
       exclude = NULL)
           0      1
                   
0  0  156465      0
   1       2      0
1  0     877      0
   1      12      2
NA 0  345216      1
   1      43      0
ftable(pheno$SRBulimiaNervosa, pheno$F502,
       exclude = NULL)
         0      1
                 
0   156855      0
1      496      7
NA  345241     19

Create eating disorder cases variable

# Anorexia nervosa
pheno$AN.count <- with(pheno, ifelse(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1, 1, 0))
# Bulimia nervosa
pheno$BN.count <- with(pheno, ifelse(SRBulimiaNervosa == 1 | F502 == 1, 1, 0))
# Binge-eating disorder
pheno$BED.count <- with(pheno, ifelse(SRBingeEating == 1, 1, 0))
# EDNOS
pheno$EDNOS.count <- with(pheno, ifelse(F509 == 1, 1, 0))
# Vomiting (Purging disorder)
pheno$PUR.count <- with(pheno, ifelse(F505 == 1, 1, 0))
# Pica
pheno$Pica.count <- with(pheno, ifelse(F508 == 1, 1, 0))

pheno$AN.count.numeric <- as.numeric(as.character(pheno$AN.count))
pheno$BN.count.numeric <- as.numeric(as.character(pheno$BN.count))
pheno$BED.count.numeric <- as.numeric(as.character(pheno$BED.count))
pheno$EDNOS.count.numeric <- as.numeric(as.character(pheno$EDNOS.count))
pheno$PUR.count.numeric <- as.numeric(as.character(pheno$PUR.count))
pheno$Pica.count.numeric <- as.numeric(as.character(pheno$Pica.count))

pheno$AN <- with(pheno, ifelse(AN.count == 0 &
                                 (SRBulimiaNervosa == 1 | F502 == 1 |
                                    SRBingeEating == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$AN.count))
pheno$AN.numeric <- as.numeric(as.character(pheno$AN))

pheno$BN <- with(pheno, ifelse(BN.count == 0 &
                                 (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
                                    SRBingeEating == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$BN.count))
pheno$BN.numeric <- as.numeric(as.character(pheno$BN))

pheno$BED <- with(pheno, ifelse(BED.count == 0 &
                                 (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
                                    SRBulimiaNervosa == 1 | F502 == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$BED.count))
pheno$BED.numeric <- as.numeric(as.character(pheno$BED))

pheno$ANpure.count <-
  with(pheno,
       ifelse(
         (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1) & # self-reported or ICD anorexia nervosa
           (is.na(SRBulimiaNervosa) | SRBulimiaNervosa == 0) & # no self-reported BN (contains NA)
             F502 == 0 & # no ICD Bulimia nervosa
            (is.na(SRBingeEating) | SRBingeEating == 0) & # no self-reported BED (contains NA)
           F505 == 0 & # no purging / vomiting
           F508 == 0 & # no Pica
           F509 == 0, # no EDNOS
       1, 0))

pheno$ANpure <- with(pheno, ifelse(ANpure.count == 0 &
                                 (SRBulimiaNervosa == 1 | F502 == 1 |
                                    SRBingeEating == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$ANpure.count))

pheno$ANpure.numeric <- as.numeric(as.character(pheno$ANpure))

pheno$BNpure.count <-
  with(pheno,
       ifelse(
         (SRBulimiaNervosa == 1 | F502 == 1) & # no self-reported or ICD BN (contains NA)
         (is.na(SRAnorexiaNervosa) | SRAnorexiaNervosa == 0) & # no self-reported AN
            F500 == 0 &
           F501 == 0 & # no ICD anorexia nervosa
            (is.na(SRBingeEating) | SRBingeEating == 0) & # no self-reported BED (contains NA)
           F505 == 0 & # no purging / vomiting
           F508 == 0 & # no Pica
           F509 == 0, # no EDNOS
       1, 0))

pheno$BNpure <- with(pheno, ifelse(BNpure.count == 0 &
                                 (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
                                    SRBingeEating == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$BNpure.count))

pheno$BNpure.numeric <- as.numeric(as.character(pheno$BNpure))

pheno$BEDpure.count <-
  with(pheno,
       ifelse(
         (SRBingeEating == 1) & # no self-reported BED (contains NA)
         (is.na(SRAnorexiaNervosa) | SRAnorexiaNervosa == 0) & # no self-reported AN
          F500 == 0 &
          F501 == 0 & # no ICD anorexia nervosa
          (is.na(SRBulimiaNervosa) | SRBulimiaNervosa == 0) & # no self-reported BN (contains NA)
          F502 == 0 & # no ICD Bulimia nervosa
          F505 == 0 & # no purging / vomiting
          F508 == 0 & # no Pica
          F509 == 0, # no EDNOS
       1, 0))

pheno$BEDpure <- with(pheno, ifelse(BEDpure.count == 0 &
                                 (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
                                    SRBulimiaNervosa == 1 | F502 == 1 |
                                    F505 == 1 |
                                    F508 == 1 |
                                    F509 == 1),
                               NA,
                               pheno$BEDpure.count))

pheno$BEDpure.numeric <- as.numeric(as.character(pheno$BEDpure))

pheno$ED <-
  with(pheno,
       ifelse(
         (SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1) | # Anorexia nervosa (SR, ICD)
           (SRBulimiaNervosa == 1 | F502 == 1) | # Bulimia nervosa (SR, ICD)
           SRBingeEating == 1 | # Binge-eating disorder (SR)
           F509 == 1 | # EDNOS
           F505 == 1 , # Vomiting (Purging disorder)
         1, 0)
       )

pheno$ED <- with(pheno, ifelse(F508 == 1, NA, pheno$ED))

pheno$ED.control <- with(pheno, ifelse(AN.count == 1 | BN.count == 1 | BED.count == 1, 1, 0))
summary(as.factor(pheno$ED.control))
     0      1   NA's 
155506   1912 345200 
# Recode as factor
EDcols <- c("AN.count", "BN.count", "BED.count",
            "EDNOS.count", "PUR.count", "Pica.count",
            "AN", "BN", "BED",
            "ANpure", "BNpure", "BEDpure",
            "ED",
            "SRAnorexiaNervosa", "SRBulimiaNervosa", "SRBingeEating",
            "SREatingDisorderAny",
            "SRANpure", "SRBNpure", "SRBEDpure")
pheno[EDcols] <- lapply(pheno[EDcols], factor)

summary(ICDED)
      IID         F500       F501       F502       F505       F508      
 1000015:     1   0:502560   0:502616   0:502593   0:502616   0:502613  
 1000027:     1   1:    59   1:     3   1:    26   1:     3   1:     6  
 1000039:     1                                                         
 1000040:     1                                                         
 1000053:     1                                                         
 1000064:     1                                                         
 (Other):502613                                                         
 F509      
 0:502588  
 1:    31  
           
           
           
           
           
EDcols.recoded.numeric <- c("SRAnorexiaNervosa.numeric",
                     "SRBulimiaNervosa.numeric",
                     "SRBingeEating.numeric",
                     "F500.numeric", "F501.numeric",
                     "F502.numeric",
                     "F505.numeric", "F508.numeric", "F509.numeric",
                    "AN.numeric", "BN.numeric", "BED.numeric",
                    "ANpure.numeric", "BNpure.numeric", "BEDpure.numeric")

EDrecoded <- pheno %>%
  select(EDcols.recoded.numeric) %>%
  filter_at(vars(EDcols.recoded.numeric), any_vars(.==1))

EDrecoded[is.na(EDrecoded)] <- 0

upset(data = EDrecoded, sets = EDcols.recoded.numeric)

pdf(file = paste0("plots/upset_eds_ukb_recoded",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(data = EDrecoded, sets = EDcols.recoded.numeric)
dev.off()
quartz_off_screen 
                2 
dfSummary(pheno[, EDcols])
Data Frame Summary  
pheno  
Dimensions: 502618 x 20  
Duplicates: 502591  

---------------------------------------------------------------------------------------------------------------
No   Variable               Stats / Values   Freqs (% of Valid)   Graph                   Valid      Missing   
---- ---------------------- ---------------- -------------------- ----------------------- ---------- ----------
1    AN.count               1. 0             156465 (99.4%)       IIIIIIIIIIIIIIIIIII     157402     345216    
     [factor]               2. 1                937 ( 0.6%)                               (31.32%)   (68.68%)  

2    BN.count               1. 0             156855 (99.7%)       IIIIIIIIIIIIIIIIIII     157377     345241    
     [factor]               2. 1                522 ( 0.3%)                               (31.31%)   (68.69%)  

3    BED.count              1. 0             156651 (99.6%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                707 ( 0.4%)                               (31.31%)   (68.69%)  

4    EDNOS.count            1. 0             502587 (100.0%)      IIIIIIIIIIIIIIIIIII     502618     0         
     [factor]               2. 1                 31 (  0.0%)                              (100%)     (0%)      

5    PUR.count              1. 0             502615 (100.0%)      IIIIIIIIIIIIIIIIIII     502618     0         
     [factor]               2. 1                  3 (  0.0%)                              (100%)     (0%)      

6    Pica.count             1. 0             502612 (100.0%)      IIIIIIIIIIIIIIIIIII     502618     0         
     [factor]               2. 1                  6 (  0.0%)                              (100%)     (0%)      

7    AN                     1. 0             155503 (99.4%)       IIIIIIIIIIIIIIIIIII     156440     346178    
     [factor]               2. 1                937 ( 0.6%)                               (31.13%)   (68.87%)  

8    BN                     1. 0             155503 (99.7%)       IIIIIIIIIIIIIIIIIII     156025     346593    
     [factor]               2. 1                522 ( 0.3%)                               (31.04%)   (68.96%)  

9    BED                    1. 0             155503 (99.6%)       IIIIIIIIIIIIIIIIIII     156210     346408    
     [factor]               2. 1                707 ( 0.4%)                               (31.08%)   (68.92%)  

10   ANpure                 1. 0             155503 (99.5%)       IIIIIIIIIIIIIIIIIII     156263     346355    
     [factor]               2. 1                760 ( 0.5%)                               (31.09%)   (68.91%)  

11   BNpure                 1. 0             155503 (99.8%)       IIIIIIIIIIIIIIIIIII     155820     346798    
     [factor]               2. 1                317 ( 0.2%)                               (31%)      (69%)     

12   BEDpure                1. 0             155503 (99.6%)       IIIIIIIIIIIIIIIIIII     156100     346518    
     [factor]               2. 1                597 ( 0.4%)                               (31.06%)   (68.94%)  

13   ED                     1. 0             155503 (98.8%)       IIIIIIIIIIIIIIIIIII     157434     345184    
     [factor]               2. 1               1931 ( 1.2%)                               (31.32%)   (68.68%)  

14   SRAnorexiaNervosa      1. 0             156467 (99.4%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                891 ( 0.6%)                               (31.31%)   (68.69%)  

15   SRBulimiaNervosa       1. 0             156855 (99.7%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                503 ( 0.3%)                               (31.31%)   (68.69%)  

16   SRBingeEating          1. 0             156651 (99.6%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                707 ( 0.4%)                               (31.31%)   (68.69%)  

17   SREatingDisorderAny    1. 0             155507 (98.8%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1               1851 ( 1.2%)                               (31.31%)   (68.69%)  

18   SRANpure               1. 0             156476 (99.4%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                882 ( 0.6%)                               (31.31%)   (68.69%)  

19   SRBNpure               1. 0             156864 (99.7%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                494 ( 0.3%)                               (31.31%)   (68.69%)  

20   SRBEDpure              1. 0             156660 (99.6%)       IIIIIIIIIIIIIIIIIII     157358     345260    
     [factor]               2. 1                698 ( 0.4%)                               (31.31%)   (68.69%)  
---------------------------------------------------------------------------------------------------------------
EDcols.recoded <- c("AN.count.numeric", "BN.count.numeric", "BED.count.numeric",
               "EDNOS.count.numeric", "PUR.count.numeric")

EDdataframe.recoded <- pheno %>%
  select(EDcols.recoded) %>%
  filter_at(vars(EDcols.recoded), any_vars(.==1)) %>%
  rename("Anorexia nervosa" = AN.count.numeric) %>%
  rename("Bulimia nervosa" = BN.count.numeric) %>%
  rename("Binge-eating disorder" = BED.count.numeric) %>%
  rename("EDNOS" = EDNOS.count.numeric) %>%
  rename("Purging disorder" = PUR.count.numeric)

EDcols.recoded.new <- c("Purging disorder",
                        "EDNOS",
                        "Binge-eating disorder",
                        "Bulimia nervosa",
                        "Anorexia nervosa")

EDdataframe.recoded[is.na(EDdataframe.recoded)] <- 0

colnames(EDdataframe.recoded)
[1] "Anorexia nervosa"      "Bulimia nervosa"       "Binge-eating disorder"
[4] "EDNOS"                 "Purging disorder"     
upset(EDdataframe.recoded,
      sets = EDcols.recoded.new,
      keep.order = T)

pdf(file = paste0("plots/upset_eds_recoded_ukb_",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(EDdataframe.recoded,
      sets = EDcols.recoded.new,
       keep.order = T)
dev.off()
quartz_off_screen 
                2 

Descriptives

Diagnoses

summarytools::dfSummary(pheno[,-1],
                        bootstrap.css     = FALSE,       # Already part of the theme so no need for it
                        plain.ascii       = FALSE,       # One of the essential settings
                        style             = "grid", # Idem.
                        dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
                        footnote          = NA,          # Keeping the results minimalistic
                        subtitle.emphasis = FALSE,
                        graph.magnif = 0.75,
                        tmp.img.dir = "/tmp")

Data Frame Summary

pheno

Dimensions: 502618 x 208
Duplicates: 44

No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing

1

Gender
[factor]

1. Female
2. Male

273405 (54.4%)
229138 (45.6%)

502543
(99.99%)

75
(0.01%)

2

Age
[integer]

Mean (sd) : 56.5 (8.1)
min < med < max:
37 < 58 < 73
IQR (CV) : 13 (0.1)

37 distinct values

502543
(99.99%)

75
(0.01%)

3

Height
[numeric]

Mean (sd) : 168.4 (9.3)
min < med < max:
75 < 168 < 209
IQR (CV) : 13.2 (0.1)

557 distinct values

500004
(99.48%)

2614
(0.52%)

4

Weight
[numeric]

Mean (sd) : 78 (15.9)
min < med < max:
30.1 < 76.4 < 197.7
IQR (CV) : 21 (0.2)

1361 distinct values

492414
(97.97%)

10204
(2.03%)

5

BMI
[numeric]

Mean (sd) : 27.4 (4.8)
min < med < max:
12.8 < 26.7 < 68.4
IQR (CV) : 5.8 (0.2)

485 distinct values

492406
(97.97%)

10212
(2.03%)

6

WC
[numeric]

Mean (sd) : 90.3 (13.5)
min < med < max:
20 < 90 < 197
IQR (CV) : 19 (0.1)

738 distinct values

500383
(99.56%)

2235
(0.44%)

7

HC
[numeric]

Mean (sd) : 103.4 (9.2)
min < med < max:
30 < 102 < 195
IQR (CV) : 11 (0.1)

609 distinct values

500324
(99.54%)

2294
(0.46%)

8

BFPC
[numeric]

Mean (sd) : 31.5 (8.5)
min < med < max:
5 < 31.1 < 69.8
IQR (CV) : 12.5 (0.3)

583 distinct values

492134
(97.91%)

10484
(2.09%)

9

FM
[numeric]

Mean (sd) : 24.9 (9.6)
min < med < max:
5 < 23.3 < 109.8
IQR (CV) : 11.4 (0.4)

856 distinct values

491568
(97.8%)

11050
(2.2%)

10

FFM
[numeric]

Mean (sd) : 53.2 (11.5)
min < med < max:
18.7 < 50.6 < 100
IQR (CV) : 18.8 (0.2)

726 distinct values

492366
(97.96%)

10252
(2.04%)

11

SES
[numeric]

Mean (sd) : -1.3 (3.1)
min < med < max:
-6.3 < -2.1 < 11
IQR (CV) : 4.2 (-2.4)

57721 distinct values

501920
(99.86%)

698
(0.14%)

12

Tobacco_current_orig
[integer]

Mean (sd) : 0.1 (0.4)
min < med < max:
-3 < 0 < 2
IQR (CV) : 0 (3.2)

-3 : 429 ( 0.1%)
0 : 448244 (89.3%)
1 : 39244 ( 7.8%)
2 : 13735 ( 2.7%)

501652
(99.81%)

966
(0.19%)

13

Alcohol_frequency_orig
[integer]

Mean (sd) : 2.9 (1.5)
min < med < max:
-3 < 3 < 6
IQR (CV) : 2 (0.5)

-3 : 605 ( 0.1%)
1 : 101775 (20.3%)
2 : 115446 (23.0%)
3 : 129298 (25.8%)
4 : 55860 (11.1%)
5 : 58013 (11.6%)
6 : 40649 ( 8.1%)

501646
(99.81%)

972
(0.19%)

14

Centre
[factor]

1. 10003
2. 11001
3. 11002
4. 11003
5. 11004
6. 11005
7. 11006
8. 11007
9. 11008
10. 11009
[ 12 others ]

3797 ( 0.8%)
13940 ( 2.8%)
14059 ( 2.8%)
17878 ( 3.6%)
18647 ( 3.7%)
17198 ( 3.4%)
19433 ( 3.9%)
29411 ( 5.9%)
28321 ( 5.6%)
37004 ( 7.4%)
302855 (60.3%)

502543
(99.99%)

75
(0.01%)

15

WHR
[numeric]

Mean (sd) : 0.9 (0.1)
min < med < max:
0.2 < 0.9 < 3
IQR (CV) : 0.1 (0.1)

10728 distinct values

500278
(99.53%)

2340
(0.47%)

16

Menopause_new
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6

229131 (45.6%)
475 ( 0.1%)
165411 (32.9%)
31171 ( 6.2%)
11732 ( 2.3%)
535 ( 0.1%)
64081 (12.8%)

502536
(99.98%)

82
(0.02%)

17

Pregnancy_no_NA
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4

229131 (45.6%)
842 ( 0.2%)
272191 (54.2%)
150 ( 0.0%)
222 ( 0.0%)

502536
(99.98%)

82
(0.02%)

18

Alcohol_frequency
[ordered, factor]

1. Never
2. Special occasions only
3. One to three times a mont
4. Once or twice a week
5. Three or four times a wee
6. Daily or almost daily

40649 ( 8.1%)
58013 (11.6%)
55860 (11.2%)
129298 (25.8%)
115446 (23.0%)
101775 (20.3%)

501041
(99.69%)

1577
(0.31%)

19

Tobacco_current
[ordered, factor]

1. No
2. Only occasionally
3. Yes, on most or all days

448244 (89.4%)
39244 ( 7.8%)
13735 ( 2.7%)

501223
(99.72%)

1395
(0.28%)

20

WHRadjBMI
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.6 < 0 < 1.3
IQR (CV) : 0.1 (132691511584721424)

129103 distinct values

492300
(97.95%)

10318
(2.05%)

21

WCadjBMI
[numeric]

Mean (sd) : 0 (7.8)
min < med < max:
-56.9 < 0 < 73.3
IQR (CV) : 11.5 (-131582801925320208)

20620 distinct values

492329
(97.95%)

10289
(2.05%)

22

HCadjBMI
[numeric]

Mean (sd) : 0 (4.7)
min < med < max:
-57.7 < 0 < 77.6
IQR (CV) : 5.9 (20044211184267472)

16568 distinct values

492336
(97.95%)

10282
(2.05%)

23

WHRadjBFPC
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.7 < 0 < 1.2
IQR (CV) : 0.1 (26203883885074980)

203008 distinct values

492028
(97.89%)

10590
(2.11%)

24

WCadjBFPC
[numeric]

Mean (sd) : 0 (13)
min < med < max:
-63.8 < 0 < 105.7
IQR (CV) : 19.4 (47784820636007160)

32667 distinct values

492057
(97.9%)

10561
(2.1%)

25

HCadjBFPC
[numeric]

Mean (sd) : 0 (7.6)
min < med < max:
-57.6 < -0.3 < 81.1
IQR (CV) : 10.1 (-102076444882764800)

26087 distinct values

492064
(97.9%)

10554
(2.1%)

26

FFMadjHeight
[numeric]

Mean (sd) : 0 (6.8)
min < med < max:
-38.4 < -0.4 < 38.8
IQR (CV) : 8.9 (26135666560807816)

33011 distinct values

492113
(97.91%)

10505
(2.09%)

27

BFPCadjHeight
[numeric]

Mean (sd) : 0 (7.4)
min < med < max:
-35.6 < -0.1 < 36.3
IQR (CV) : 10 (-21797180866540348)

35656 distinct values

491884
(97.86%)

10734
(2.14%)

28

BFPC_trunk
[numeric]

Mean (sd) : 31.2 (8)
min < med < max:
2 < 31.2 < 77.6
IQR (CV) : 10.6 (0.3)

650 distinct values

492112
(97.91%)

10506
(2.09%)

29

FM_trunk
[numeric]

Mean (sd) : 13.7 (5.2)
min < med < max:
0.5 < 13.2 < 59.9
IQR (CV) : 6.5 (0.4)

473 distinct values

492085
(97.9%)

10533
(2.1%)

30

FFM_trunk
[numeric]

Mean (sd) : 29.6 (6)
min < med < max:
2 < 28.2 < 58.8
IQR (CV) : 9.9 (0.2)

426 distinct values

492001
(97.89%)

10617
(2.11%)

31

Impedance_wb
[integer]

Mean (sd) : 600.1 (89)
min < med < max:
51 < 595 < 998
IQR (CV) : 130 (0.1)

787 distinct values

492355
(97.96%)

10263
(2.04%)

32

Ethnicity
[factor]

1. European
2. African
3. Asian
4. Chinese
5. Mixed
6. Other

472161 (94.6%)
8035 ( 1.6%)
9839 ( 2.0%)
1574 ( 0.3%)
2909 ( 0.6%)
4559 ( 0.9%)

499077
(99.3%)

3541
(0.7%)

33

Region
[factor]

1. 1
2. 2
3. 3
4. 4
5. 5
6. 6
7. 7
8. 8
9. 9
10. 11
[ 6 others ]

3 ( 0.0%)
5 ( 0.0%)
21 ( 0.0%)
5 ( 0.0%)
396420 (79.7%)
32481 ( 6.5%)
23070 ( 4.6%)
10100 ( 2.0%)
18 ( 0.0%)
27305 ( 5.5%)
8064 ( 1.6%)

497492
(98.98%)

5126
(1.02%)

34

Income
[factor]

1. -3
2. -1
3. 1
4. 2
5. 3
6. 4
7. 5

49852 (10.0%)
21305 ( 4.3%)
97208 (19.6%)
108180 (21.8%)
110777 (22.3%)
86272 (17.4%)
22932 ( 4.6%)

496526
(98.79%)

6092
(1.21%)

35

Birth_weight
[numeric]

Mean (sd) : 3.3 (0.7)
min < med < max:
0.4 < 3.3 < 10
IQR (CV) : 0.7 (0.2)

366 distinct values

277012
(55.11%)

225606
(44.89%)

36

Menarche_age_at
[integer]

Mean (sd) : 12.5 (2.9)
min < med < max:
-3 < 13 < 25
IQR (CV) : 2 (0.2)

23 distinct values

272937
(54.3%)

229681
(45.7%)

37

Contraceptive_ever
[factor]

1. -3
2. -1
3. 0
4. 1

498 ( 0.2%)
444 ( 0.2%)
51534 (18.9%)
220458 (80.8%)

272934
(54.3%)

229684
(45.7%)

38

HRT
[factor]

1. -3
2. -1
3. 0
4. 1

297 ( 0.1%)
800 ( 0.3%)
167913 (61.5%)
103924 (38.1%)

272934
(54.3%)

229684
(45.7%)

39

Breastfed
[factor]

1. -3
2. -1
3. 0
4. 1

462 ( 0.1%)
117443 (23.4%)
106123 (21.2%)
277617 (55.3%)

501645
(99.81%)

973
(0.19%)

40

VAT
[integer]

Mean (sd) : 1217.3 (904.1)
min < med < max:
0 < 1037 < 6261
IQR (CV) : 1217 (0.7)

2394 distinct values

5109
(1.02%)

497509
(98.98%)

41

BMC_wb
[integer]

Mean (sd) : 2636.7 (569.2)
min < med < max:
632 < 2586.5 < 4717
IQR (CV) : 873.8 (0.2)

1982 distinct values

5170
(1.03%)

497448
(98.97%)

42

BMD_total
[numeric]

Mean (sd) : 1.2 (0.2)
min < med < max:
0.2 < 1.2 < 1.8
IQR (CV) : 0.2 (0.1)

721 distinct values

5170
(1.03%)

497448
(98.97%)

43

BipolarInitialQ
[factor]

1. 1
2. 2

808 (50.0%)
807 (50.0%)

1615
(0.32%)

501003
(99.68%)

44

Age.At.MHQ
[integer]

Mean (sd) : 64 (7.7)
min < med < max:
46 < 65 < 81
IQR (CV) : 12 (0.1)

36 distinct values

157358
(31.31%)

345260
(68.69%)

45

Migrant.Status
[factor]

1. 0
2. 1

145880 (92.8%)
11359 ( 7.2%)

157239
(31.28%)

345379
(68.72%)

46

Highest.Qualification
[factor]

1. ALevel
2. Degree
3. GCSE
4. NoneOfTheAbove
5. Other

21077 (13.5%)
70993 (45.6%)
44910 (28.8%)
10930 ( 7.0%)
7948 ( 5.1%)

155858
(31.01%)

346760
(68.99%)

47

Smoker
[factor]

1. Current
2. Former
3. Never
4. PreferNotToAnswer

11339 ( 7.2%)
55282 (35.1%)
90360 (57.5%)
304 ( 0.2%)

157285
(31.29%)

345333
(68.71%)

48

Longstanding.Illness
[factor]

1. 0
2. 1

110878 (71.9%)
43449 (28.1%)

154327
(30.7%)

348291
(69.3%)

49

Diabetes
[factor]

1. 0
2. 1

151836 (96.7%)
5235 ( 3.3%)

157071
(31.25%)

345547
(68.75%)

50

Cancer
[factor]

1. 0
2. 1

145858 (92.9%)
11102 ( 7.1%)

156960
(31.23%)

345658
(68.77%)

51

CVD
[factor]

1. 0
2. 1

119804 (76.3%)
37303 (23.7%)

157107
(31.26%)

345511
(68.74%)

52

Respiratory
[factor]

1. 0
2. 1

103734 (66.0%)
53445 (34.0%)

157179
(31.27%)

345439
(68.73%)

53

Neuroticism
[integer]

Mean (sd) : 3.9 (3.2)
min < med < max:
0 < 3 < 12
IQR (CV) : 5 (0.8)

13 distinct values

131362
(26.14%)

371256
(73.86%)

54

SRSocPhobia
[factor]

1. 0
2. 1

155396 (98.8%)
1962 ( 1.2%)

157358
(31.31%)

345260
(68.69%)

55

SRSchizophrenia
[factor]

1. 0
2. 1

157201 (99.9%)
157 ( 0.1%)

157358
(31.31%)

345260
(68.69%)

56

SRPsychosisOther
[factor]

1. 0
2. 1

156754 (99.6%)
604 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

57

SRPsychosisAny
[factor]

1. 0
2. 1

156635 (99.5%)
723 ( 0.5%)

157358
(31.31%)

345260
(68.69%)

58

SRPersonalityDisorder
[factor]

1. 0
2. 1

156973 (99.8%)
385 ( 0.2%)

157358
(31.31%)

345260
(68.69%)

59

SROtherPhobia
[factor]

1. 0
2. 1

155205 (98.6%)
2153 ( 1.4%)

157358
(31.31%)

345260
(68.69%)

60

SRPanicAttacks
[factor]

1. 0
2. 1

148654 (94.5%)
8704 ( 5.5%)

157358
(31.31%)

345260
(68.69%)

61

SROCD
[factor]

1. 0
2. 1

156376 (99.4%)
982 ( 0.6%)

157358
(31.31%)

345260
(68.69%)

62

SRManiaBIP
[factor]

1. 0
2. 1

156521 (99.5%)
837 ( 0.5%)

157358
(31.31%)

345260
(68.69%)

63

SRDepression
[factor]

1. 0
2. 1

123936 (78.8%)
33422 (21.2%)

157358
(31.31%)

345260
(68.69%)

64

SRMood
[factor]

1. 0
2. 1

123622 (78.6%)
33736 (21.4%)

157358
(31.31%)

345260
(68.69%)

65

SRBulimiaNervosa
[factor]

1. 0
2. 1

156855 (99.7%)
503 ( 0.3%)

157358
(31.31%)

345260
(68.69%)

66

SRBingeEating
[factor]

1. 0
2. 1

156651 (99.6%)
707 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

67

SRASD
[factor]

1. 0
2. 1

157135 (99.9%)
223 ( 0.1%)

157358
(31.31%)

345260
(68.69%)

68

SRGADandOthers
[factor]

1. 0
2. 1

135323 (86.0%)
22035 (14.0%)

157358
(31.31%)

345260
(68.69%)

69

SRAnorexiaNervosa
[factor]

1. 0
2. 1

156467 (99.4%)
891 ( 0.6%)

157358
(31.31%)

345260
(68.69%)

70

SREatingDisorderAny
[factor]

1. 0
2. 1

155507 (98.8%)
1851 ( 1.2%)

157358
(31.31%)

345260
(68.69%)

71

SRAgoraphobia
[factor]

1. 0
2. 1

156759 (99.6%)
599 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

72

SRAnxietyAny
[factor]

1. 0
2. 1

129412 (82.2%)
27946 (17.8%)

157358
(31.31%)

345260
(68.69%)

73

SRADHD
[factor]

1. 0
2. 1

157225 (99.9%)
133 ( 0.1%)

157358
(31.31%)

345260
(68.69%)

74

SRPNTA
[integer]

Min : 0
Mean : 0
Max : 1

0 : 156825 (99.7%)
1 : 533 ( 0.3%)

157358
(31.31%)

345260
(68.69%)

75

SmithDepression
[factor]

1. 0
2. 1

30883 (72.9%)
11491 (27.1%)

42374
(8.43%)

460244
(91.57%)

76

PHQ9.No.Info
[factor]

1. 0
2. 1

157136 (99.9%)
222 ( 0.1%)

157358
(31.31%)

345260
(68.69%)

77

PHQ9.Screen
[factor]

1. 0
2. 1

116288 (73.9%)
41070 (26.1%)

157358
(31.31%)

345260
(68.69%)

78

PHQ9.Items
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6
8. 7
9. 8
10. 9

119203 (75.8%)
19732 (12.5%)
8004 ( 5.1%)
3941 ( 2.5%)
2162 ( 1.4%)
1470 ( 0.9%)
1009 ( 0.6%)
865 ( 0.5%)
622 ( 0.4%)
350 ( 0.2%)

157358
(31.31%)

345260
(68.69%)

79

PHQ9.Severity
[integer]

Mean (sd) : 2.8 (3.7)
min < med < max:
0 < 2 < 27
IQR (CV) : 4 (1.3)

28 distinct values

157358
(31.31%)

345260
(68.69%)

80

Depressed.Ever
[factor]

1. 0
2. 1

88647 (70.3%)
37430 (29.7%)

126077
(25.08%)

376541
(74.92%)

81

Depressed.Ever.Severe
[factor]

1. 0
2. 1

120125 (95.3%)
5952 ( 4.7%)

126077
(25.08%)

376541
(74.92%)

82

Recurrent.Depression
[factor]

1. 0
2. 1

42231 (66.6%)
21185 (33.4%)

63416
(12.62%)

439202
(87.38%)

83

Single.Depression
[factor]

1. 0
2. 1

48735 (76.8%)
14681 (23.2%)

63416
(12.62%)

439202
(87.38%)

84

SmithBipolar
[factor]

1. 0
2. 1

41874 (98.8%)
500 ( 1.2%)

42374
(8.43%)

460244
(91.57%)

85

GAD7.Severity
[integer]

Mean (sd) : 2.2 (3.4)
min < med < max:
0 < 0 < 21
IQR (CV) : 3 (1.6)

22 distinct values

157264
(31.29%)

345354
(68.71%)

86

GAD.Ever
[factor]

1. 0
2. 1

96793 (89.7%)
11110 (10.3%)

107903
(21.47%)

394715
(78.53%)

87

GAD.Current
[factor]

1. 0
2. 1

105222 (97.5%)
2679 ( 2.5%)

107901
(21.47%)

394717
(78.53%)

88

AUDIT.Score
[integer]

Mean (sd) : 4.9 (4.2)
min < med < max:
0 < 4 < 40
IQR (CV) : 5 (0.9)

41 distinct values

157358
(31.31%)

345260
(68.69%)

89

Alcohol.Use.Disorder
[factor]

1. 0
2. 1

63455 (66.1%)
32601 (33.9%)

96056
(19.11%)

406562
(80.89%)

90

Cannabis.Ever
[factor]

1. 0
2. 1

122473 (77.9%)
34656 (22.1%)

157129
(31.26%)

345489
(68.74%)

91

Cannabis.Daily
[factor]

1. 0
2. 1

32572 (93.4%)
2313 ( 6.6%)

34885
(6.94%)

467733
(93.06%)

92

Addiction.Ever.SelfReport
[factor]

1. 0
2. 1

146216 (94.0%)
9384 ( 6.0%)

155600
(30.96%)

347018
(69.04%)

93

Addiction.Ever
[factor]

1. 0
2. 1

142276 (93.8%)
9384 ( 6.2%)

151660
(30.17%)

350958
(69.83%)

94

Substance.Addiction.Ever
[factor]

1. 0
2. 1

4378 (46.7%)
5002 (53.3%)

9380
(1.87%)

493238
(98.13%)

95

Alcohol.Dependence.Ever
[factor]

1. 0
2. 1

2489 (72.5%)
946 (27.5%)

3435
(0.68%)

499183
(99.32%)

96

Addiction.Current
[factor]

1. 0
2. 1

3535 (54.2%)
2990 (45.8%)

6525
(1.3%)

496093
(98.7%)

97

Trauma.Childhood
[factor]

1. 0
2. 1

82402 (53.6%)
71241 (46.4%)

153643
(30.57%)

348975
(69.43%)

98

Trauma.Adult
[factor]

1. 0
2. 1

69977 (46.4%)
80955 (53.6%)

150932
(30.03%)

351686
(69.97%)

99

Trauma.Catastrophic
[factor]

1. 0
2. 1

77538 (49.3%)
79788 (50.7%)

157326
(31.3%)

345292
(68.7%)

100

PTSD
[factor]

1. 0
2. 1

146649 (93.6%)
10063 ( 6.4%)

156712
(31.18%)

345906
(68.82%)

101

Self.Harm.Ever
[factor]

1. 0
2. 1

150011 (95.6%)
6872 ( 4.4%)

156883
(31.21%)

345735
(68.79%)

102

Not.Worth.Living
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 108757 (69.1%)
1 : 48601 (30.9%)

157358
(31.31%)

345260
(68.69%)

103

Self.Harm.Suicide.Attempt
[factor]

1. 0
2. 1

3201 (48.3%)
3426 (51.7%)

6627
(1.32%)

495991
(98.68%)

104

WellbeingScore
[integer]

Mean (sd) : 12.7 (2)
min < med < max:
3 < 13 < 17
IQR (CV) : 3 (0.2)

15 distinct values

152694
(30.38%)

349924
(69.62%)

105

NoSRConditions
[factor]

1. 0
2. 1

54017 (34.3%)
103341 (65.7%)

157358
(31.31%)

345260
(68.69%)

106

MHQ
[factor]

1. 1

157358 (100.0%)

157358
(31.31%)

345260
(68.69%)

107

SRAnyMDX
[factor]

1. 0
2. 1

107259 (68.2%)
50099 (31.8%)

157358
(31.31%)

345260
(68.69%)

108

SRANpure
[factor]

1. 0
2. 1

156476 (99.4%)
882 ( 0.6%)

157358
(31.31%)

345260
(68.69%)

109

SRBNpure
[factor]

1. 0
2. 1

156864 (99.7%)
494 ( 0.3%)

157358
(31.31%)

345260
(68.69%)

110

SRBEDpure
[factor]

1. 0
2. 1

156660 (99.6%)
698 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

111

Corticoids
[factor]

1. 0
2. 1

490339 (97.6%)
12279 ( 2.4%)

502618
(100%)

0
(0%)

112

Antidiabetics
[factor]

1. 0
2. 1

483581 (96.2%)
19037 ( 3.8%)

502618
(100%)

0
(0%)

113

Diuretics
[factor]

1. 0
2. 1

462505 (92.0%)
40113 ( 8.0%)

502618
(100%)

0
(0%)

114

Gonadotropins
[factor]

1. 0
2. 1

502081 (99.9%)
537 ( 0.1%)

502618
(100%)

0
(0%)

115

Growth_Hormone
[factor]

1. 0
2. 1

502446 (100.0%)
172 ( 0.0%)

502618
(100%)

0
(0%)

116

AntiretroviralMed
[factor]

1. 0
2. 1

502195 (99.9%)
423 ( 0.1%)

502618
(100%)

0
(0%)

117

HRT_Contraceptives
[factor]

1. 0
2. 1

478495 (95.2%)
24123 ( 4.8%)

502618
(100%)

0
(0%)

118

Antiosteoporotics
[factor]

1. 0
2. 1

493957 (98.3%)
8661 ( 1.7%)

502618
(100%)

0
(0%)

119

Testosterone
[factor]

1. 0
2. 1

502030 (99.9%)
588 ( 0.1%)

502618
(100%)

0
(0%)

120

Thyreostatics
[factor]

1. 0
2. 1

474247 (94.4%)
28371 ( 5.6%)

502618
(100%)

0
(0%)

121

AntitubercularAntileproticMed
[factor]

1. 0
2. 1

502511 (100.0%)
107 ( 0.0%)

502618
(100%)

0
(0%)

122

Antidepressants
[factor]

1. 0
2. 1

465830 (92.7%)
36788 ( 7.3%)

502618
(100%)

0
(0%)

123

Antineoplastics
[factor]

1. 0
2. 1

495299 (98.5%)
7319 ( 1.5%)

502618
(100%)

0
(0%)

124

Antipsychotics
[factor]

1. 0
2. 1

499662 (99.4%)
2956 ( 0.6%)

502618
(100%)

0
(0%)

125

Anxiolytics
[factor]

1. 0
2. 1

496967 (98.9%)
5651 ( 1.1%)

502618
(100%)

0
(0%)

126

MetabolicMed
[factor]

1. 0
2. 1

360326 (71.7%)
142292 (28.3%)

502618
(100%)

0
(0%)

127

PsychotropicMed
[factor]

1. 0
2. 1

450195 (89.6%)
52423 (10.4%)

502618
(100%)

0
(0%)

128

DxCancerAny
[factor]

1. 0
2. 1

429640 (85.5%)
72978 (14.5%)

502618
(100%)

0
(0%)

129

DxBodyCompAny
[factor]

1. 0
2. 1

330751 (65.8%)
171867 (34.2%)

502618
(100%)

0
(0%)

130

CancerRegister
[factor]

1. 0
2. 1

437661 (87.1%)
64957 (12.9%)

502618
(100%)

0
(0%)

131

DxConnectiveTissue
[factor]

1. 0
2. 1

498946 (99.3%)
3672 ( 0.7%)

502618
(100%)

0
(0%)

132

DxDiabetes
[factor]

1. 0
2. 1

476605 (94.8%)
26013 ( 5.2%)

502618
(100%)

0
(0%)

133

DxEndocrine
[factor]

1. 0
2. 1

499401 (99.4%)
3217 ( 0.6%)

502618
(100%)

0
(0%)

134

DxGlucose
[factor]

1. 0
2. 1

501409 (99.8%)
1209 ( 0.2%)

502618
(100%)

0
(0%)

135

DxHIV
[factor]

1. 0
2. 1

502407 (100.0%)
211 ( 0.0%)

502618
(100%)

0
(0%)

136

DxIBD
[factor]

1. 0
2. 1

479133 (95.3%)
23485 ( 4.7%)

502618
(100%)

0
(0%)

137

DxIBS
[factor]

1. 0
2. 1

495941 (98.7%)
6677 ( 1.3%)

502618
(100%)

0
(0%)

138

DxLiver
[factor]

1. 0
2. 1

496400 (98.8%)
6218 ( 1.2%)

502618
(100%)

0
(0%)

139

DxPsy
[factor]

1. 0
2. 1

463307 (92.2%)
39311 ( 7.8%)

502618
(100%)

0
(0%)

140

DxMetabolic
[factor]

1. 0
2. 1

447982 (89.1%)
54636 (10.9%)

502618
(100%)

0
(0%)

141

DxMuscles
[factor]

1. 0
2. 1

501215 (99.7%)
1403 ( 0.3%)

502618
(100%)

0
(0%)

142

DxPancreatitis
[factor]

1. 0
2. 1

501977 (99.9%)
641 ( 0.1%)

502618
(100%)

0
(0%)

143

DxThyroid
[factor]

1. 0
2. 1

481756 (95.9%)
20862 ( 4.2%)

502618
(100%)

0
(0%)

144

DxTuberculosis
[factor]

1. 0
2. 1

502333 (99.9%)
285 ( 0.1%)

502618
(100%)

0
(0%)

145

F500
[factor]

1. 0
2. 1

502559 (100.0%)
59 ( 0.0%)

502618
(100%)

0
(0%)

146

F501
[factor]

1. 0
2. 1

502615 (100.0%)
3 ( 0.0%)

502618
(100%)

0
(0%)

147

F502
[factor]

1. 0
2. 1

502592 (100.0%)
26 ( 0.0%)

502618
(100%)

0
(0%)

148

F505
[factor]

1. 0
2. 1

502615 (100.0%)
3 ( 0.0%)

502618
(100%)

0
(0%)

149

F508
[factor]

1. 0
2. 1

502612 (100.0%)
6 ( 0.0%)

502618
(100%)

0
(0%)

150

F509
[factor]

1. 0
2. 1

502587 (100.0%)
31 ( 0.0%)

502618
(100%)

0
(0%)

151

SRAnyIllnessBC
[factor]

1. 0
2. 1

374256 (74.5%)
128362 (25.5%)

502618
(100%)

0
(0%)

152

SRAnyIllnessBC.0
[factor]

1. 0
2. 1

378244 (75.2%)
124374 (24.8%)

502618
(100%)

0
(0%)

153

SRAnyIllnessBC.1
[factor]

1. 0
2. 1

498211 (99.1%)
4407 ( 0.9%)

502618
(100%)

0
(0%)

154

SRAnyIllnessBC.2
[factor]

1. 0
2. 1

498903 (99.3%)
3715 ( 0.7%)

502618
(100%)

0
(0%)

155

SRAnyPsyIllness
[factor]

1. 0
2. 1

464923 (92.5%)
37695 ( 7.5%)

502618
(100%)

0
(0%)

156

SRAnyNeuroIllness
[factor]

1. 0
2. 1

489801 (97.5%)
12817 ( 2.5%)

502618
(100%)

0
(0%)

157

SRAnyCancerBC
[factor]

1. 0
2. 1

461639 (91.8%)
40979 ( 8.2%)

502618
(100%)

0
(0%)

158

SRAnyCancer
[factor]

1. 0
2. 1

459285 (91.4%)
43333 ( 8.6%)

502618
(100%)

0
(0%)

159

SRAnyCancerNeuro
[factor]

1. 0
2. 1

502351 (100.0%)
267 ( 0.1%)

502618
(100%)

0
(0%)

160

SRHeartAttack
[factor]

1. 1

11849 (100.0%)

11849
(2.36%)

490769
(97.64%)

161

SRAngina
[factor]

1. 1

16490 (100.0%)

16490
(3.28%)

486128
(96.72%)

162

SRStroke
[factor]

1. 1

7901 (100.0%)

7901
(1.57%)

494717
(98.43%)

163

SRHighBloodPressure
[factor]

1. 1

138100 (100.0%)

138100
(27.48%)

364518
(72.52%)

164

ICD.autoimmune
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 460599 (91.6%)
1 : 42019 ( 8.4%)

502618
(100%)

0
(0%)

165

ICD.autoinflammatory
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 352521 (70.1%)
1 : 150097 (29.9%)

502618
(100%)

0
(0%)

166

ICD.immunodeficiency
[integer]

Min : 0
Mean : 0
Max : 1

0 : 502049 (99.9%)
1 : 569 ( 0.1%)

502618
(100%)

0
(0%)

167

ICD.memory
[integer]

Min : 0
Mean : 0
Max : 1

0 : 497862 (99.1%)
1 : 4756 ( 0.9%)

502618
(100%)

0
(0%)

168

ICD.metabolic
[integer]

Min : 0
Mean : 0.2
Max : 1

0 : 418253 (83.2%)
1 : 84365 (16.8%)

502618
(100%)

0
(0%)

169

ICD.psychiatric
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 463307 (92.2%)
1 : 39311 ( 7.8%)

502618
(100%)

0
(0%)

170

ICD.immunodysregulation
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 345449 (68.7%)
1 : 157169 (31.3%)

502618
(100%)

0
(0%)

171

SRAnorexiaNervosa.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156467 (99.4%)
1 : 891 ( 0.6%)

157358
(31.31%)

345260
(68.69%)

172

F500.numeric
[integer]

Min : 0
Mean : 0
Max : 1

0 : 502559 (100.0%)
1 : 59 ( 0.0%)

502618
(100%)

0
(0%)

173

F501.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502615 (100.0%)
1 : 3 ( 0.0%)

502618
(100%)

0
(0%)

174

F502.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502592 (100.0%)
1 : 26 ( 0.0%)

502618
(100%)

0
(0%)

175

F505.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502615 (100.0%)
1 : 3 ( 0.0%)

502618
(100%)

0
(0%)

176

F508.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502612 (100.0%)
1 : 6 ( 0.0%)

502618
(100%)

0
(0%)

177

F509.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502587 (100.0%)
1 : 31 ( 0.0%)

502618
(100%)

0
(0%)

178

SRBulimiaNervosa.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156855 (99.7%)
1 : 503 ( 0.3%)

157358
(31.31%)

345260
(68.69%)

179

SRBingeEating.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156651 (99.6%)
1 : 707 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

180

AN.count
[factor]

1. 0
2. 1

156465 (99.4%)
937 ( 0.6%)

157402
(31.32%)

345216
(68.68%)

181

BN.count
[factor]

1. 0
2. 1

156855 (99.7%)
522 ( 0.3%)

157377
(31.31%)

345241
(68.69%)

182

BED.count
[factor]

1. 0
2. 1

156651 (99.6%)
707 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

183

EDNOS.count
[factor]

1. 0
2. 1

502587 (100.0%)
31 ( 0.0%)

502618
(100%)

0
(0%)

184

PUR.count
[factor]

1. 0
2. 1

502615 (100.0%)
3 ( 0.0%)

502618
(100%)

0
(0%)

185

Pica.count
[factor]

1. 0
2. 1

502612 (100.0%)
6 ( 0.0%)

502618
(100%)

0
(0%)

186

AN.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156465 (99.4%)
1 : 937 ( 0.6%)

157402
(31.32%)

345216
(68.68%)

187

BN.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156855 (99.7%)
1 : 522 ( 0.3%)

157377
(31.31%)

345241
(68.69%)

188

BED.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156651 (99.6%)
1 : 707 ( 0.4%)

157358
(31.31%)

345260
(68.69%)

189

EDNOS.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502587 (100.0%)
1 : 31 ( 0.0%)

502618
(100%)

0
(0%)

190

PUR.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502615 (100.0%)
1 : 3 ( 0.0%)

502618
(100%)

0
(0%)

191

Pica.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 502612 (100.0%)
1 : 6 ( 0.0%)

502618
(100%)

0
(0%)

192

AN
[factor]

1. 0
2. 1

155503 (99.4%)
937 ( 0.6%)

156440
(31.13%)

346178
(68.87%)

193

AN.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.4%)
1 : 937 ( 0.6%)

156440
(31.13%)

346178
(68.87%)

194

BN
[factor]

1. 0
2. 1

155503 (99.7%)
522 ( 0.3%)

156025
(31.04%)

346593
(68.96%)

195

BN.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.7%)
1 : 522 ( 0.3%)

156025
(31.04%)

346593
(68.96%)

196

BED
[factor]

1. 0
2. 1

155503 (99.6%)
707 ( 0.4%)

156210
(31.08%)

346408
(68.92%)

197

BED.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.6%)
1 : 707 ( 0.4%)

156210
(31.08%)

346408
(68.92%)

198

ANpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156680 (99.5%)
1 : 760 ( 0.5%)

157440
(31.32%)

345178
(68.68%)

199

ANpure
[factor]

1. 0
2. 1

155503 (99.5%)
760 ( 0.5%)

156263
(31.09%)

346355
(68.91%)

200

ANpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.5%)
1 : 760 ( 0.5%)

156263
(31.09%)

346355
(68.91%)

201

BNpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 157123 (99.8%)
1 : 317 ( 0.2%)

157440
(31.32%)

345178
(68.68%)

202

BNpure
[factor]

1. 0
2. 1

155503 (99.8%)
317 ( 0.2%)

155820
(31%)

346798
(69%)

203

BNpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.8%)
1 : 317 ( 0.2%)

155820
(31%)

346798
(69%)

204

BEDpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 156843 (99.6%)
1 : 597 ( 0.4%)

157440
(31.32%)

345178
(68.68%)

205

BEDpure
[factor]

1. 0
2. 1

155503 (99.6%)
597 ( 0.4%)

156100
(31.06%)

346518
(68.94%)

206

BEDpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155503 (99.6%)
1 : 597 ( 0.4%)

156100
(31.06%)

346518
(68.94%)

207

ED
[factor]

1. 0
2. 1

155503 (98.8%)
1931 ( 1.2%)

157434
(31.32%)

345184
(68.68%)

208

ED.control
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 155506 (98.8%)
1 : 1912 ( 1.2%)

157418
(31.32%)

345200
(68.68%)

Import Principal Components

pcs <- fread(file = "data_raw/2019_02_29_BC/pcs_pcas.txt",
             header = TRUE, data.table = FALSE)
dim(pcs)
[1] 385753     17
# Select 10 PCs
pcs_reduced <- pcs[,2:12]
dim(pcs_reduced)
[1] 385753     11
colnames(pcs_reduced)
 [1] "IID"  "PC1"  "PC2"  "PC3"  "PC4"  "PC5"  "PC6"  "PC7"  "PC8"  "PC9" 
[11] "PC10"

Batch and array type

batch_array <- fread(file = "data_raw/2019_02_29_BC/2754_Batch_Array.txt",
                     header = FALSE, data.table = FALSE)
# Add column names
colnames(batch_array) <- c("IID", "Array", "Batch")
batch_array$Array <- as.factor(batch_array$Array)
batch_array$Batch <- as.factor(batch_array$Batch)
dim(batch_array)
[1] 488377      3

Import and merge genetic IIDs (GID) which survived genetic quality control

MAF0.01_GENO0.02_MIND0.02_CAUC1_UKBQC1_UNREL0.044_HWE0.00000001_SEX1

GID <- fread(file = "data_raw/2019_02_29_BC/GID_list.txt",
             header = TRUE,
             data.table = FALSE)
GID$Passed_GQC <- factor(GID$Passed_GQC, levels = c(0, 1, 2), labels = c(0, "Male", "Female"))
summary(GID$Passed_GQC)
     0   Male Female 
    10 177555 208188 
GID$EuropeanGenetic <- 1
summary(GID$Passed_GQC)
     0   Male Female 
    10 177555 208188 
str(GID)
'data.frame':   385753 obs. of  3 variables:
 $ IID            : int  1993198 4439466 3643257 2033164 1890169 2373695 5873836 5835698 4946986 3939224 ...
 $ Passed_GQC     : Factor w/ 3 levels "0","Male","Female": 2 3 3 3 3 3 3 3 3 3 ...
 $ EuropeanGenetic: num  1 1 1 1 1 1 1 1 1 1 ...
dim(GID)
[1] 385753      3
dim(pheno)
[1] 502618    209
GID_pheno <- merge(GID, pheno, all = FALSE, sort = FALSE)
dim(GID_pheno)
[1] 385743    211

Merge genetic information

Merge genetic IDs and UKB phenotypes with MHQ, medication (meds), and ICD diagnoses (dxICD)

pcs_batch_array <- merge(pcs_reduced, batch_array, all = TRUE, sort = FALSE)


dim(pcs_batch_array)
[1] 488377     13
GID_pheno_pcs_batch_array <- merge(GID_pheno,
        pcs_batch_array,
        all.x = TRUE,
        sort = FALSE)

GIDpheno <- GID_pheno_pcs_batch_array
dim(GIDpheno)
[1] 385743    223
dim(GID)
[1] 385753      3
dim(pheno)
[1] 502618    209
GID_pheno_all <- merge(GID, pheno, all = TRUE, sort = FALSE)
dim(GID_pheno_all)
[1] 502628    211
GID_pheno_all_genetic <- merge(GID_pheno_all, pcs_batch_array, all = TRUE, sort = FALSE)
dim(GID_pheno_all_genetic)
[1] 502630    223

Extract complete cases

GIDpheno_cc <- GIDpheno[complete.cases(GIDpheno[,cols_cc]),]
cases_incomplete <- nrow(GIDpheno_cc)-nrow(GIDpheno)
cases_incomplete
[1] -8310
dim(GIDpheno_cc)
[1] 377433    223
sum(is.na(GIDpheno_cc[,cols_cc]))
[1] 0
summary(GIDpheno_cc$Gender)
Female   Male 
204244 173189 

Female and male

# Female
GIDpheno_cc_female <- subset(GIDpheno_cc, Gender == "Female")

dim(GIDpheno_cc_female)
[1] 204244    223
# 208188   325

# Male
GIDpheno_cc_male <- subset(GIDpheno_cc, Gender == "Male")
dim(GIDpheno_cc_male)
[1] 173189    223
# 177554   325

Write phenotype file: European participants with complete body composition data including genetic information and participants with hysterectomy

write.table(GIDpheno_cc,
            file = paste("data/BCpheno",date,".txt", sep =""),
            quote = T,
            sep = "\t",
            col.names = T,
            row.names = F)

Exclusion body composition: 300K

Recode menopause to binary Recodes every female with hysterctomy to NA

# Empty column
GIDpheno_cc["Menopause_bin"] <- NA
# Male & NA -> 0
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 0] <- 0)
# Female & NA -> NA
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 1] <- NA)
# Female & Yes -> 1
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 2] <- 1)
# Female & Hysterectomy -> NA
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 3] <- NA)
# Female & Not sure -> 0
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 4] <- 0)
#Female & Prefer not to answer
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 5] <- NA)
# Female & No
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 6] <- 0)
# Recode as factor
GIDpheno_cc$Menopause_bin <- as.factor(GIDpheno_cc$Menopause_bin)

summary(GIDpheno_cc$Menopause_bin)
     0      1   NA's 
228955 124974  23504 
# OLD
#     0      1   NA's
# 234273 127330  24139

Exclude hysterectomy and pregnancy 0) male & NA 1) female & NA 2) female & yes 3) female & hysterectomy 4) female & not sure 5) female & prefer not to answer 6) female & no

GIDpheno_cc_menopause <- subset(GIDpheno_cc, Menopause_new == 0 | Menopause_new == 2 | Menopause_new == 4 | Menopause_new == 6)

dim(GIDpheno_cc_menopause)
[1] 353929    224
# 361603    325

# Menopause prior to subsetting
summary(GIDpheno_cc$Menopause_new)
     0      1      2      3      4      5      6 
173189      1 124974  23334   8747    169  47019 
#0          1        2      3      4      5      6      NA's
#177552    128  127330  23826   8896    183  47825      2

# Menopause after subsetting
summary(GIDpheno_cc_menopause$Menopause_new)
     0      1      2      3      4      5      6 
173189      0 124974      0   8747      0  47019 
# 0           1      2        3        4         5      6
# 177552      0     127330     0     8896      0     47825

Exclude pregnant women 0) male & NA 1) female & NA 2) female & no 3) female & yes 4) female & unsure

GIDpheno_cc_menopause_pregnancy <- subset(GIDpheno_cc_menopause, Pregnancy_no_NA == 0 | Pregnancy_no_NA == 2)

### Prior to subsetting of hysterectomy and menopause
summary(GIDpheno_cc$Pregnancy_no_NA)
     0      1      2      3      4 
173189      0 204244      0      0 
#0          1      2      3      4   NA's
#177552    209 207720    105    154      2

### Prior to subsetting of pregnancy
summary(GIDpheno_cc_menopause$Pregnancy_no_NA)
     0      1      2      3      4 
173189      0 180740      0      0 
#     0      1      2      3      4
#177552    174 183623    104    150

### After subsetting of pregnancy
summary(GIDpheno_cc_menopause_pregnancy$Pregnancy_no_NA)
     0      1      2      3      4 
173189      0 180740      0      0 
#0           1      2      3      4
#177552      0 183623      0      0

# Double check: there should be no NAs anymore in the binary Menopause variable
summary(GIDpheno_cc_menopause_pregnancy$Menopause_bin)
     0      1 
228955 124974 
# 0      1
# 233987 127188

dim(GIDpheno_cc_menopause_pregnancy)
[1] 353929    224
# 361175 321

Complete cases

without hysterectomy and pregnancy including diagnoses, medication, selfreport, and genetic variables

OLD:353972 321

Move data frame

complete <- GIDpheno_cc_menopause_pregnancy

Create female and male subset

# Female
complete_female <- subset(complete, Gender == "Female")
dim(complete_female)
[1] 180740    224
# 180765   321

# Male
complete_male <- subset(complete, Gender == "Male")
dim(complete_male)
[1] 173189    224
# 173207   321

Descriptives: Complete (300K)

summarytools::dfSummary(complete[,-1],
                        bootstrap.css     = FALSE,       # Already part of the theme so no need for it
                        plain.ascii       = FALSE,       # One of the essential settings
                        style             = "grid", # Idem.
                        dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
                        footnote          = NA,          # Keeping the results minimalistic
                        subtitle.emphasis = FALSE,
                        graph.magnif = 0.75,
                        tmp.img.dir = "/tmp")

Data Frame Summary

complete

Dimensions: 353929 x 223
Duplicates: 0

No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing

1

Passed_GQC
[factor]

1. 0
2. Male
3. Female

0 ( 0.0%)
173189 (48.9%)
180740 (51.1%)

353929
(100%)

0
(0%)

2

EuropeanGenetic
[numeric]

1 distinct value

1 : 353929 (100.0%)

353929
(100%)

0
(0%)

3

Gender
[factor]

1. Female
2. Male

180740 (51.1%)
173189 (48.9%)

353929
(100%)

0
(0%)

4

Age
[integer]

Mean (sd) : 56.6 (8.1)
min < med < max:
38 < 58 < 73
IQR (CV) : 13 (0.1)

36 distinct values

353929
(100%)

0
(0%)

5

Height
[numeric]

Mean (sd) : 169.2 (9.2)
min < med < max:
121 < 169 < 209
IQR (CV) : 14 (0.1)

535 distinct values

353929
(100%)

0
(0%)

6

Weight
[numeric]

Mean (sd) : 78.5 (15.9)
min < med < max:
34 < 77 < 197.7
IQR (CV) : 21.1 (0.2)

1288 distinct values

353929
(100%)

0
(0%)

7

BMI
[numeric]

Mean (sd) : 27.3 (4.7)
min < med < max:
14.5 < 26.7 < 68.4
IQR (CV) : 5.7 (0.2)

462 distinct values

353929
(100%)

0
(0%)

8

WC
[numeric]

Mean (sd) : 90.4 (13.5)
min < med < max:
20 < 90 < 197
IQR (CV) : 18 (0.1)

704 distinct values

353929
(100%)

0
(0%)

9

HC
[numeric]

Mean (sd) : 103.3 (9)
min < med < max:
39 < 102 < 195
IQR (CV) : 11 (0.1)

558 distinct values

353929
(100%)

0
(0%)

10

BFPC
[numeric]

Mean (sd) : 30.9 (8.4)
min < med < max:
6.5 < 30.4 < 69.8
IQR (CV) : 12.2 (0.3)

556 distinct values

353929
(100%)

0
(0%)

11

FM
[numeric]

Mean (sd) : 24.5 (9.4)
min < med < max:
5 < 23 < 109.8
IQR (CV) : 11.1 (0.4)

827 distinct values

353929
(100%)

0
(0%)

12

FFM
[numeric]

Mean (sd) : 54 (11.6)
min < med < max:
26.2 < 52.2 < 100
IQR (CV) : 19.3 (0.2)

706 distinct values

353929
(100%)

0
(0%)

13

SES
[numeric]

Mean (sd) : -1.5 (3)
min < med < max:
-6.3 < -2.3 < 11
IQR (CV) : 3.9 (-2)

52279 distinct values

353929
(100%)

0
(0%)

14

Tobacco_current_orig
[integer]

Mean (sd) : 0.1 (0.4)
min < med < max:
-3 < 0 < 2
IQR (CV) : 0 (3.2)

-3 : 191 ( 0.1%)
0 : 316964 (89.6%)
1 : 27059 ( 7.6%)
2 : 9715 ( 2.7%)

353929
(100%)

0
(0%)

15

Alcohol_frequency_orig
[integer]

Mean (sd) : 2.8 (1.5)
min < med < max:
-3 < 3 < 6
IQR (CV) : 2 (0.5)

-3 : 261 ( 0.1%)
1 : 77316 (21.9%)
2 : 86319 (24.4%)
3 : 92740 (26.2%)
4 : 38701 (10.9%)
5 : 36262 (10.2%)
6 : 22330 ( 6.3%)

353929
(100%)

0
(0%)

16

Centre
[factor]

1. 10003
2. 11001
3. 11002
4. 11003
5. 11004
6. 11005
7. 11006
8. 11007
9. 11008
10. 11009
[ 12 others ]

317 ( 0.1%)
9795 ( 2.8%)
10713 ( 3.0%)
13100 ( 3.7%)
14005 ( 4.0%)
12987 ( 3.7%)
13582 ( 3.8%)
22701 ( 6.4%)
20489 ( 5.8%)
25518 ( 7.2%)
210722 (59.5%)

353929
(100%)

0
(0%)

17

WHR
[numeric]

Mean (sd) : 0.9 (0.1)
min < med < max:
0.2 < 0.9 < 2.1
IQR (CV) : 0.1 (0.1)

8666 distinct values

353929
(100%)

0
(0%)

18

Menopause_new
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6

173189 (48.9%)
0 ( 0.0%)
124974 (35.3%)
0 ( 0.0%)
8747 ( 2.5%)
0 ( 0.0%)
47019 (13.3%)

353929
(100%)

0
(0%)

19

Pregnancy_no_NA
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4

173189 (48.9%)
0 ( 0.0%)
180740 (51.1%)
0 ( 0.0%)
0 ( 0.0%)

353929
(100%)

0
(0%)

20

Alcohol_frequency
[ordered, factor]

1. Never
2. Special occasions only
3. One to three times a mont
4. Once or twice a week
5. Three or four times a wee
6. Daily or almost daily

22330 ( 6.3%)
36262 (10.2%)
38701 (10.9%)
92740 (26.2%)
86319 (24.4%)
77316 (21.9%)

353668
(99.93%)

261
(0.07%)

21

Tobacco_current
[ordered, factor]

1. No
2. Only occasionally
3. Yes, on most or all days

316964 (89.6%)
27059 ( 7.6%)
9715 ( 2.8%)

353738
(99.95%)

191
(0.05%)

22

WHRadjBMI
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.6 < 0 < 1.3
IQR (CV) : 0.1 (26.3)

108094 distinct values

353929
(100%)

0
(0%)

23

WCadjBMI
[numeric]

Mean (sd) : 0.4 (7.8)
min < med < max:
-56.9 < 0.6 < 73.3
IQR (CV) : 11.5 (20.8)

17817 distinct values

353929
(100%)

0
(0%)

24

HCadjBMI
[numeric]

Mean (sd) : 0.1 (4.6)
min < med < max:
-57.7 < 0 < 77.6
IQR (CV) : 5.8 (85.6)

14219 distinct values

353929
(100%)

0
(0%)

25

WHRadjBFPC
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.7 < 0 < 1.2
IQR (CV) : 0.1 (61.4)

167707 distinct values

353929
(100%)

0
(0%)

26

WCadjBFPC
[numeric]

Mean (sd) : 0.4 (13.1)
min < med < max:
-63.8 < 0.6 < 105.7
IQR (CV) : 19.6 (35.5)

29019 distinct values

353929
(100%)

0
(0%)

27

HCadjBFPC
[numeric]

Mean (sd) : 0.2 (7.5)
min < med < max:
-57.6 < 0 < 81.1
IQR (CV) : 10 (30.6)

22685 distinct values

353929
(100%)

0
(0%)

28

FFMadjHeight
[numeric]

Mean (sd) : 0 (6.8)
min < med < max:
-26.7 < -0.3 < 38.3
IQR (CV) : 9 (236)

28768 distinct values

353929
(100%)

0
(0%)

29

BFPCadjHeight
[numeric]

Mean (sd) : -0.2 (7.3)
min < med < max:
-35.3 < -0.4 < 36.3
IQR (CV) : 9.7 (-31.4)

30583 distinct values

353929
(100%)

0
(0%)

30

BFPC_trunk
[numeric]

Mean (sd) : 30.8 (7.9)
min < med < max:
2 < 30.7 < 75.6
IQR (CV) : 10.4 (0.3)

629 distinct values

353727
(99.94%)

202
(0.06%)

31

FM_trunk
[numeric]

Mean (sd) : 13.7 (5.1)
min < med < max:
0.6 < 13.1 < 59.9
IQR (CV) : 6.5 (0.4)

454 distinct values

353705
(99.94%)

224
(0.06%)

32

FFM_trunk
[numeric]

Mean (sd) : 30 (6)
min < med < max:
2 < 29 < 57.6
IQR (CV) : 10.1 (0.2)

409 distinct values

353639
(99.92%)

290
(0.08%)

33

Impedance_wb
[integer]

Mean (sd) : 595.9 (88.4)
min < med < max:
51 < 590 < 998
IQR (CV) : 130 (0.1)

746 distinct values

353898
(99.99%)

31
(0.01%)

34

Ethnicity
[factor]

1. European
2. African
3. Asian
4. Chinese
5. Mixed
6. Other

351224 (99.6%)
1 ( 0.0%)
39 ( 0.0%)
1 ( 0.0%)
505 ( 0.1%)
729 ( 0.2%)

352499
(99.6%)

1430
(0.4%)

35

Region
[factor]

1. 1
2. 2
3. 3
4. 4
5. 5
6. 6
7. 7
8. 8
9. 9
10. 11
[ 6 others ]

3 ( 0.0%)
4 ( 0.0%)
18 ( 0.0%)
4 ( 0.0%)
274355 (78.3%)
24252 ( 6.9%)
17519 ( 5.0%)
7724 ( 2.2%)
6 ( 0.0%)
20563 ( 5.9%)
6101 ( 1.7%)

350549
(99.05%)

3380
(0.95%)

36

Income
[factor]

1. -3
2. -1
3. 1
4. 2
5. 3
6. 4
7. 5

33223 ( 9.4%)
12952 ( 3.7%)
64358 (18.2%)
76913 (21.8%)
81537 (23.1%)
65814 (18.6%)
18009 ( 5.1%)

352806
(99.68%)

1123
(0.32%)

37

Birth_weight
[numeric]

Mean (sd) : 3.3 (0.7)
min < med < max:
0.4 < 3.4 < 9
IQR (CV) : 0.7 (0.2)

344 distinct values

199813
(56.46%)

154116
(43.54%)

38

Menarche_age_at
[integer]

Mean (sd) : 12.6 (2.8)
min < med < max:
-3 < 13 < 25
IQR (CV) : 2 (0.2)

23 distinct values

180740
(51.07%)

173189
(48.93%)

39

Contraceptive_ever
[factor]

1. -3
2. -1
3. 0
4. 1

146 ( 0.1%)
166 ( 0.1%)
31313 (17.3%)
149115 (82.5%)

180740
(51.07%)

173189
(48.93%)

40

HRT
[factor]

1. -3
2. -1
3. 0
4. 1

55 ( 0.0%)
372 ( 0.2%)
117249 (64.9%)
63064 (34.9%)

180740
(51.07%)

173189
(48.93%)

41

Breastfed
[factor]

1. -3
2. -1
3. 0
4. 1

130 ( 0.0%)
84861 (24.0%)
77233 (21.8%)
191705 (54.2%)

353929
(100%)

0
(0%)

42

VAT
[integer]

Mean (sd) : 1243.1 (928.5)
min < med < max:
0 < 1056 < 6261
IQR (CV) : 1258 (0.7)

2105 distinct values

3801
(1.07%)

350128
(98.93%)

43

BMC_wb
[integer]

Mean (sd) : 2663.7 (573.5)
min < med < max:
632 < 2628.5 < 4717
IQR (CV) : 890.2 (0.2)

1801 distinct values

3852
(1.09%)

350077
(98.91%)

44

BMD_total
[numeric]

Mean (sd) : 1.2 (0.2)
min < med < max:
0.2 < 1.2 < 1.8
IQR (CV) : 0.2 (0.1)

689 distinct values

3852
(1.09%)

350077
(98.91%)

45

BipolarInitialQ
[factor]

1. 1
2. 2

557 (52.6%)
502 (47.4%)

1059
(0.3%)

352870
(99.7%)

46

Age.At.MHQ
[integer]

Mean (sd) : 64 (7.7)
min < med < max:
46 < 65 < 80
IQR (CV) : 12 (0.1)

35 distinct values

116985
(33.05%)

236944
(66.95%)

47

Migrant.Status
[factor]

1. 0
2. 1

109977 (94.0%)
6988 ( 6.0%)

116965
(33.05%)

236964
(66.95%)

48

Highest.Qualification
[factor]

1. ALevel
2. Degree
3. GCSE
4. NoneOfTheAbove
5. Other

16004 (13.8%)
54645 (46.9%)
32454 (27.9%)
7701 ( 6.6%)
5622 ( 4.8%)

116426
(32.9%)

237503
(67.1%)

49

Smoker
[factor]

1. Current
2. Former
3. Never
4. PreferNotToAnswer

8421 ( 7.2%)
41690 (35.6%)
66660 (57.0%)
214 ( 0.2%)

116985
(33.05%)

236944
(66.95%)

50

Longstanding.Illness
[factor]

1. 0
2. 1

83517 (72.7%)
31356 (27.3%)

114873
(32.46%)

239056
(67.54%)

51

Diabetes
[factor]

1. 0
2. 1

113142 (96.8%)
3708 ( 3.2%)

116850
(33.02%)

237079
(66.98%)

52

Cancer
[factor]

1. 0
2. 1

108882 (93.2%)
7901 ( 6.8%)

116783
(33%)

237146
(67%)

53

CVD
[factor]

1. 0
2. 1

89806 (76.8%)
27063 (23.2%)

116869
(33.02%)

237060
(66.98%)

54

Respiratory
[factor]

1. 0
2. 1

77576 (66.3%)
39340 (33.7%)

116916
(33.03%)

237013
(66.97%)

55

Neuroticism
[integer]

Mean (sd) : 3.8 (3.2)
min < med < max:
0 < 3 < 12
IQR (CV) : 5 (0.8)

13 distinct values

98179
(27.74%)

255750
(72.26%)

56

SRSocPhobia
[factor]

1. 0
2. 1

115562 (98.8%)
1423 ( 1.2%)

116985
(33.05%)

236944
(66.95%)

57

SRSchizophrenia
[factor]

1. 0
2. 1

116879 (99.9%)
106 ( 0.1%)

116985
(33.05%)

236944
(66.95%)

58

SRPsychosisOther
[factor]

1. 0
2. 1

116542 (99.6%)
443 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

59

SRPsychosisAny
[factor]

1. 0
2. 1

116460 (99.6%)
525 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

60

SRPersonalityDisorder
[factor]

1. 0
2. 1

116712 (99.8%)
273 ( 0.2%)

116985
(33.05%)

236944
(66.95%)

61

SROtherPhobia
[factor]

1. 0
2. 1

115482 (98.7%)
1503 ( 1.3%)

116985
(33.05%)

236944
(66.95%)

62

SRPanicAttacks
[factor]

1. 0
2. 1

110759 (94.7%)
6226 ( 5.3%)

116985
(33.05%)

236944
(66.95%)

63

SROCD
[factor]

1. 0
2. 1

116285 (99.4%)
700 ( 0.6%)

116985
(33.05%)

236944
(66.95%)

64

SRManiaBIP
[factor]

1. 0
2. 1

116388 (99.5%)
597 ( 0.5%)

116985
(33.05%)

236944
(66.95%)

65

SRDepression
[factor]

1. 0
2. 1

92762 (79.3%)
24223 (20.7%)

116985
(33.05%)

236944
(66.95%)

66

SRMood
[factor]

1. 0
2. 1

92528 (79.1%)
24457 (20.9%)

116985
(33.05%)

236944
(66.95%)

67

SRBulimiaNervosa
[factor]

1. 0
2. 1

116622 (99.7%)
363 ( 0.3%)

116985
(33.05%)

236944
(66.95%)

68

SRBingeEating
[factor]

1. 0
2. 1

116505 (99.6%)
480 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

69

SRASD
[factor]

1. 0
2. 1

116812 (99.9%)
173 ( 0.1%)

116985
(33.05%)

236944
(66.95%)

70

SRGADandOthers
[factor]

1. 0
2. 1

100836 (86.2%)
16149 (13.8%)

116985
(33.05%)

236944
(66.95%)

71

SRAnorexiaNervosa
[factor]

1. 0
2. 1

116334 (99.4%)
651 ( 0.6%)

116985
(33.05%)

236944
(66.95%)

72

SREatingDisorderAny
[factor]

1. 0
2. 1

115682 (98.9%)
1303 ( 1.1%)

116985
(33.05%)

236944
(66.95%)

73

SRAgoraphobia
[factor]

1. 0
2. 1

116538 (99.6%)
447 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

74

SRAnxietyAny
[factor]

1. 0
2. 1

96648 (82.6%)
20337 (17.4%)

116985
(33.05%)

236944
(66.95%)

75

SRADHD
[factor]

1. 0
2. 1

116896 (99.9%)
89 ( 0.1%)

116985
(33.05%)

236944
(66.95%)

76

SRPNTA
[integer]

Min : 0
Mean : 0
Max : 1

0 : 116600 (99.7%)
1 : 385 ( 0.3%)

116985
(33.05%)

236944
(66.95%)

77

SmithDepression
[factor]

1. 0
2. 1

22858 (73.3%)
8324 (26.7%)

31182
(8.81%)

322747
(91.19%)

78

PHQ9.No.Info
[factor]

1. 0
2. 1

116840 (99.9%)
145 ( 0.1%)

116985
(33.05%)

236944
(66.95%)

79

PHQ9.Screen
[factor]

1. 0
2. 1

87006 (74.4%)
29979 (25.6%)

116985
(33.05%)

236944
(66.95%)

80

PHQ9.Items
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6
8. 7
9. 8
10. 9

89603 (76.6%)
14367 (12.3%)
5711 ( 4.9%)
2762 ( 2.4%)
1537 ( 1.3%)
1037 ( 0.9%)
709 ( 0.6%)
604 ( 0.5%)
417 ( 0.4%)
238 ( 0.2%)

116985
(33.05%)

236944
(66.95%)

81

PHQ9.Severity
[integer]

Mean (sd) : 2.7 (3.7)
min < med < max:
0 < 2 < 27
IQR (CV) : 4 (1.3)

28 distinct values

116985
(33.05%)

236944
(66.95%)

82

Depressed.Ever
[factor]

1. 0
2. 1

66802 (71.1%)
27198 (28.9%)

94000
(26.56%)

259929
(73.44%)

83

Depressed.Ever.Severe
[factor]

1. 0
2. 1

89812 (95.5%)
4188 ( 4.5%)

94000
(26.56%)

259929
(73.44%)

84

Recurrent.Depression
[factor]

1. 0
2. 1

31416 (67.2%)
15329 (32.8%)

46745
(13.21%)

307184
(86.79%)

85

Single.Depression
[factor]

1. 0
2. 1

35993 (77.0%)
10752 (23.0%)

46745
(13.21%)

307184
(86.79%)

86

SmithBipolar
[factor]

1. 0
2. 1

30834 (98.9%)
348 ( 1.1%)

31182
(8.81%)

322747
(91.19%)

87

GAD7.Severity
[integer]

Mean (sd) : 2.1 (3.3)
min < med < max:
0 < 0 < 21
IQR (CV) : 3 (1.6)

22 distinct values

116922
(33.04%)

237007
(66.96%)

88

GAD.Ever
[factor]

1. 0
2. 1

72566 (90.0%)
8096 (10.0%)

80662
(22.79%)

273267
(77.21%)

89

GAD.Current
[factor]

1. 0
2. 1

78789 (97.7%)
1871 ( 2.3%)

80660
(22.79%)

273269
(77.21%)

90

AUDIT.Score
[integer]

Mean (sd) : 5.1 (4.2)
min < med < max:
0 < 4 < 40
IQR (CV) : 5 (0.8)

41 distinct values

116985
(33.05%)

236944
(66.95%)

91

Alcohol.Use.Disorder
[factor]

1. 0
2. 1

45384 (64.1%)
25395 (35.9%)

70779
(20%)

283150
(80%)

92

Cannabis.Ever
[factor]

1. 0
2. 1

89977 (77.0%)
26837 (23.0%)

116814
(33%)

237115
(67%)

93

Cannabis.Daily
[factor]

1. 0
2. 1

25231 (93.4%)
1777 ( 6.6%)

27008
(7.63%)

326921
(92.37%)

94

Addiction.Ever.SelfReport
[factor]

1. 0
2. 1

108501 (93.8%)
7125 ( 6.2%)

115626
(32.67%)

238303
(67.33%)

95

Addiction.Ever
[factor]

1. 0
2. 1

105450 (93.7%)
7125 ( 6.3%)

112575
(31.81%)

241354
(68.19%)

96

Substance.Addiction.Ever
[factor]

1. 0
2. 1

3321 (46.6%)
3800 (53.4%)

7121
(2.01%)

346808
(97.99%)

97

Alcohol.Dependence.Ever
[factor]

1. 0
2. 1

1900 (72.3%)
729 (27.7%)

2629
(0.74%)

351300
(99.26%)

98

Addiction.Current
[factor]

1. 0
2. 1

2663 (53.8%)
2290 (46.2%)

4953
(1.4%)

348976
(98.6%)

99

Trauma.Childhood
[factor]

1. 0
2. 1

62363 (54.5%)
52133 (45.5%)

114496
(32.35%)

239433
(67.65%)

100

Trauma.Adult
[factor]

1. 0
2. 1

53207 (47.3%)
59308 (52.7%)

112515
(31.79%)

241414
(68.21%)

101

Trauma.Catastrophic
[factor]

1. 0
2. 1

57565 (49.2%)
59395 (50.8%)

116960
(33.05%)

236969
(66.95%)

102

PTSD
[factor]

1. 0
2. 1

109412 (93.9%)
7134 ( 6.1%)

116546
(32.93%)

237383
(67.07%)

103

Self.Harm.Ever
[factor]

1. 0
2. 1

111646 (95.7%)
5007 ( 4.3%)

116653
(32.96%)

237276
(67.04%)

104

Not.Worth.Living
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 81132 (69.3%)
1 : 35853 (30.6%)

116985
(33.05%)

236944
(66.95%)

105

Self.Harm.Suicide.Attempt
[factor]

1. 0
2. 1

2384 (49.5%)
2434 (50.5%)

4818
(1.36%)

349111
(98.64%)

106

WellbeingScore
[integer]

Mean (sd) : 12.7 (2)
min < med < max:
3 < 13 < 17
IQR (CV) : 3 (0.2)

15 distinct values

113605
(32.1%)

240324
(67.9%)

107

NoSRConditions
[factor]

1. 0
2. 1

39502 (33.8%)
77483 (66.2%)

116985
(33.05%)

236944
(66.95%)

108

MHQ
[factor]

1. 1

116985 (100.0%)

116985
(33.05%)

236944
(66.95%)

109

SRAnyMDX
[factor]

1. 0
2. 1

80538 (68.8%)
36447 (31.2%)

116985
(33.05%)

236944
(66.95%)

110

SRANpure
[factor]

1. 0
2. 1

116339 (99.5%)
646 ( 0.5%)

116985
(33.05%)

236944
(66.95%)

111

SRBNpure
[factor]

1. 0
2. 1

116627 (99.7%)
358 ( 0.3%)

116985
(33.05%)

236944
(66.95%)

112

SRBEDpure
[factor]

1. 0
2. 1

116510 (99.6%)
475 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

113

Corticoids
[factor]

1. 0
2. 1

345620 (97.7%)
8309 ( 2.4%)

353929
(100%)

0
(0%)

114

Antidiabetics
[factor]

1. 0
2. 1

342197 (96.7%)
11732 ( 3.3%)

353929
(100%)

0
(0%)

115

Diuretics
[factor]

1. 0
2. 1

327642 (92.6%)
26287 ( 7.4%)

353929
(100%)

0
(0%)

116

Gonadotropins
[factor]

1. 0
2. 1

353541 (99.9%)
388 ( 0.1%)

353929
(100%)

0
(0%)

117

Growth_Hormone
[factor]

1. 0
2. 1

353811 (100.0%)
118 ( 0.0%)

353929
(100%)

0
(0%)

118

AntiretroviralMed
[factor]

1. 0
2. 1

353661 (99.9%)
268 ( 0.1%)

353929
(100%)

0
(0%)

119

HRT_Contraceptives
[factor]

1. 0
2. 1

339072 (95.8%)
14857 ( 4.2%)

353929
(100%)

0
(0%)

120

Antiosteoporotics
[factor]

1. 0
2. 1

348232 (98.4%)
5697 ( 1.6%)

353929
(100%)

0
(0%)

121

Testosterone
[factor]

1. 0
2. 1

353534 (99.9%)
395 ( 0.1%)

353929
(100%)

0
(0%)

122

Thyreostatics
[factor]

1. 0
2. 1

335216 (94.7%)
18713 ( 5.3%)

353929
(100%)

0
(0%)

123

AntitubercularAntileproticMed
[factor]

1. 0
2. 1

353870 (100.0%)
59 ( 0.0%)

353929
(100%)

0
(0%)

124

Antidepressants
[factor]

1. 0
2. 1

329351 (93.1%)
24578 ( 6.9%)

353929
(100%)

0
(0%)

125

Antineoplastics
[factor]

1. 0
2. 1

349038 (98.6%)
4891 ( 1.4%)

353929
(100%)

0
(0%)

126

Antipsychotics
[factor]

1. 0
2. 1

352026 (99.5%)
1903 ( 0.5%)

353929
(100%)

0
(0%)

127

Anxiolytics
[factor]

1. 0
2. 1

350228 (99.0%)
3701 ( 1.1%)

353929
(100%)

0
(0%)

128

MetabolicMed
[factor]

1. 0
2. 1

259896 (73.4%)
94033 (26.6%)

353929
(100%)

0
(0%)

129

PsychotropicMed
[factor]

1. 0
2. 1

318765 (90.1%)
35164 ( 9.9%)

353929
(100%)

0
(0%)

130

DxCancerAny
[factor]

1. 0
2. 1

302293 (85.4%)
51636 (14.6%)

353929
(100%)

0
(0%)

131

DxBodyCompAny
[factor]

1. 0
2. 1

237204 (67.0%)
116725 (33.0%)

353929
(100%)

0
(0%)

132

CancerRegister
[factor]

1. 0
2. 1

307896 (87.0%)
46033 (13.0%)

353929
(100%)

0
(0%)

133

DxConnectiveTissue
[factor]

1. 0
2. 1

351616 (99.4%)
2313 ( 0.7%)

353929
(100%)

0
(0%)

134

DxDiabetes
[factor]

1. 0
2. 1

337737 (95.4%)
16192 ( 4.6%)

353929
(100%)

0
(0%)

135

DxEndocrine
[factor]

1. 0
2. 1

351906 (99.4%)
2023 ( 0.6%)

353929
(100%)

0
(0%)

136

DxGlucose
[factor]

1. 0
2. 1

353188 (99.8%)
741 ( 0.2%)

353929
(100%)

0
(0%)

137

DxHIV
[factor]

1. 0
2. 1

353806 (100.0%)
123 ( 0.0%)

353929
(100%)

0
(0%)

138

DxIBD
[factor]

1. 0
2. 1

338439 (95.6%)
15490 ( 4.4%)

353929
(100%)

0
(0%)

139

DxIBS
[factor]

1. 0
2. 1

349745 (98.8%)
4184 ( 1.2%)

353929
(100%)

0
(0%)

140

DxLiver
[factor]

1. 0
2. 1

349865 (98.9%)
4064 ( 1.1%)

353929
(100%)

0
(0%)

141

DxPsy
[factor]

1. 0
2. 1

327808 (92.6%)
26121 ( 7.4%)

353929
(100%)

0
(0%)

142

DxMetabolic
[factor]

1. 0
2. 1

317904 (89.8%)
36025 (10.2%)

353929
(100%)

0
(0%)

143

DxMuscles
[factor]

1. 0
2. 1

353033 (99.8%)
896 ( 0.2%)

353929
(100%)

0
(0%)

144

DxPancreatitis
[factor]

1. 0
2. 1

353515 (99.9%)
414 ( 0.1%)

353929
(100%)

0
(0%)

145

DxThyroid
[factor]

1. 0
2. 1

340750 (96.3%)
13179 ( 3.7%)

353929
(100%)

0
(0%)

146

DxTuberculosis
[factor]

1. 0
2. 1

353815 (100.0%)
114 ( 0.0%)

353929
(100%)

0
(0%)

147

F500
[factor]

1. 0
2. 1

353901 (100.0%)
28 ( 0.0%)

353929
(100%)

0
(0%)

148

F501
[factor]

1. 0
2. 1

353927 (100.0%)
2 ( 0.0%)

353929
(100%)

0
(0%)

149

F502
[factor]

1. 0
2. 1

353914 (100.0%)
15 ( 0.0%)

353929
(100%)

0
(0%)

150

F505
[factor]

1. 0
2. 1

353928 (100.0%)
1 ( 0.0%)

353929
(100%)

0
(0%)

151

F508
[factor]

1. 0
2. 1

353928 (100.0%)
1 ( 0.0%)

353929
(100%)

0
(0%)

152

F509
[factor]

1. 0
2. 1

353914 (100.0%)
15 ( 0.0%)

353929
(100%)

0
(0%)

153

SRAnyIllnessBC
[factor]

1. 0
2. 1

267243 (75.5%)
86686 (24.5%)

353929
(100%)

0
(0%)

154

SRAnyIllnessBC.0
[factor]

1. 0
2. 1

270118 (76.3%)
83811 (23.7%)

353929
(100%)

0
(0%)

155

SRAnyIllnessBC.1
[factor]

1. 0
2. 1

350782 (99.1%)
3147 ( 0.9%)

353929
(100%)

0
(0%)

156

SRAnyIllnessBC.2
[factor]

1. 0
2. 1

351284 (99.2%)
2645 ( 0.8%)

353929
(100%)

0
(0%)

157

SRAnyPsyIllness
[factor]

1. 0
2. 1

327619 (92.6%)
26310 ( 7.4%)

353929
(100%)

0
(0%)

158

SRAnyNeuroIllness
[factor]

1. 0
2. 1

345069 (97.5%)
8860 ( 2.5%)

353929
(100%)

0
(0%)

159

SRAnyCancerBC
[factor]

1. 0
2. 1

325632 (92.0%)
28297 ( 8.0%)

353929
(100%)

0
(0%)

160

SRAnyCancer
[factor]

1. 0
2. 1

324056 (91.6%)
29873 ( 8.4%)

353929
(100%)

0
(0%)

161

SRAnyCancerNeuro
[factor]

1. 0
2. 1

353740 (100.0%)
189 ( 0.1%)

353929
(100%)

0
(0%)

162

SRHeartAttack
[factor]

1. 1

8119 (100.0%)

8119
(2.29%)

345810
(97.71%)

163

SRAngina
[factor]

1. 1

11004 (100.0%)

11004
(3.11%)

342925
(96.89%)

164

SRStroke
[factor]

1. 1

5263 (100.0%)

5263
(1.49%)

348666
(98.51%)

165

SRHighBloodPressure
[factor]

1. 1

94520 (100.0%)

94520
(26.71%)

259409
(73.29%)

166

ICD.autoimmune
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 326463 (92.2%)
1 : 27466 ( 7.8%)

353929
(100%)

0
(0%)

167

ICD.autoinflammatory
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 252761 (71.4%)
1 : 101168 (28.6%)

353929
(100%)

0
(0%)

168

ICD.immunodeficiency
[integer]

Min : 0
Mean : 0
Max : 1

0 : 353555 (99.9%)
1 : 374 ( 0.1%)

353929
(100%)

0
(0%)

169

ICD.memory
[integer]

Min : 0
Mean : 0
Max : 1

0 : 350791 (99.1%)
1 : 3138 ( 0.9%)

353929
(100%)

0
(0%)

170

ICD.metabolic
[integer]

Min : 0
Mean : 0.2
Max : 1

0 : 298681 (84.4%)
1 : 55248 (15.6%)

353929
(100%)

0
(0%)

171

ICD.psychiatric
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 327808 (92.6%)
1 : 26121 ( 7.4%)

353929
(100%)

0
(0%)

172

ICD.immunodysregulation
[integer]

Min : 0
Mean : 0.3
Max : 1

0 : 248024 (70.1%)
1 : 105905 (29.9%)

353929
(100%)

0
(0%)

173

SRAnorexiaNervosa.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116334 (99.4%)
1 : 651 ( 0.6%)

116985
(33.05%)

236944
(66.95%)

174

F500.numeric
[integer]

Min : 0
Mean : 0
Max : 1

0 : 353901 (100.0%)
1 : 28 ( 0.0%)

353929
(100%)

0
(0%)

175

F501.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353927 (100.0%)
1 : 2 ( 0.0%)

353929
(100%)

0
(0%)

176

F502.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353914 (100.0%)
1 : 15 ( 0.0%)

353929
(100%)

0
(0%)

177

F505.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353928 (100.0%)
1 : 1 ( 0.0%)

353929
(100%)

0
(0%)

178

F508.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353928 (100.0%)
1 : 1 ( 0.0%)

353929
(100%)

0
(0%)

179

F509.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353914 (100.0%)
1 : 15 ( 0.0%)

353929
(100%)

0
(0%)

180

SRBulimiaNervosa.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116622 (99.7%)
1 : 363 ( 0.3%)

116985
(33.05%)

236944
(66.95%)

181

SRBingeEating.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116505 (99.6%)
1 : 480 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

182

AN.count
[factor]

1. 0
2. 1

116332 (99.4%)
671 ( 0.6%)

117003
(33.06%)

236926
(66.94%)

183

BN.count
[factor]

1. 0
2. 1

116622 (99.7%)
372 ( 0.3%)

116994
(33.06%)

236935
(66.94%)

184

BED.count
[factor]

1. 0
2. 1

116505 (99.6%)
480 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

185

EDNOS.count
[factor]

1. 0
2. 1

353914 (100.0%)
15 ( 0.0%)

353929
(100%)

0
(0%)

186

PUR.count
[factor]

1. 0
2. 1

353928 (100.0%)
1 ( 0.0%)

353929
(100%)

0
(0%)

187

Pica.count
[factor]

1. 0
2. 1

353928 (100.0%)
1 ( 0.0%)

353929
(100%)

0
(0%)

188

AN.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116332 (99.4%)
1 : 671 ( 0.6%)

117003
(33.06%)

236926
(66.94%)

189

BN.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116622 (99.7%)
1 : 372 ( 0.3%)

116994
(33.06%)

236935
(66.94%)

190

BED.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116505 (99.6%)
1 : 480 ( 0.4%)

116985
(33.05%)

236944
(66.95%)

191

EDNOS.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353914 (100.0%)
1 : 15 ( 0.0%)

353929
(100%)

0
(0%)

192

PUR.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353928 (100.0%)
1 : 1 ( 0.0%)

353929
(100%)

0
(0%)

193

Pica.count.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 353928 (100.0%)
1 : 1 ( 0.0%)

353929
(100%)

0
(0%)

194

AN
[factor]

1. 0
2. 1

115679 (99.4%)
671 ( 0.6%)

116350
(32.87%)

237579
(67.13%)

195

AN.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.4%)
1 : 671 ( 0.6%)

116350
(32.87%)

237579
(67.13%)

196

BN
[factor]

1. 0
2. 1

115679 (99.7%)
372 ( 0.3%)

116051
(32.79%)

237878
(67.21%)

197

BN.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.7%)
1 : 372 ( 0.3%)

116051
(32.79%)

237878
(67.21%)

198

BED
[factor]

1. 0
2. 1

115679 (99.6%)
480 ( 0.4%)

116159
(32.82%)

237770
(67.18%)

199

BED.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.6%)
1 : 480 ( 0.4%)

116159
(32.82%)

237770
(67.18%)

200

ANpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116487 (99.5%)
1 : 536 ( 0.5%)

117023
(33.06%)

236906
(66.94%)

201

ANpure
[factor]

1. 0
2. 1

115679 (99.5%)
536 ( 0.5%)

116215
(32.84%)

237714
(67.16%)

202

ANpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.5%)
1 : 536 ( 0.5%)

116215
(32.84%)

237714
(67.16%)

203

BNpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116804 (99.8%)
1 : 219 ( 0.2%)

117023
(33.06%)

236906
(66.94%)

204

BNpure
[factor]

1. 0
2. 1

115679 (99.8%)
219 ( 0.2%)

115898
(32.75%)

238031
(67.25%)

205

BNpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.8%)
1 : 219 ( 0.2%)

115898
(32.75%)

238031
(67.25%)

206

BEDpure.count
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 116623 (99.7%)
1 : 400 ( 0.3%)

117023
(33.06%)

236906
(66.94%)

207

BEDpure
[factor]

1. 0
2. 1

115679 (99.7%)
400 ( 0.3%)

116079
(32.8%)

237850
(67.2%)

208

BEDpure.numeric
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115679 (99.7%)
1 : 400 ( 0.3%)

116079
(32.8%)

237850
(67.2%)

209

ED
[factor]

1. 0
2. 1

115679 (98.9%)
1343 ( 1.1%)

117022
(33.06%)

236907
(66.94%)

210

ED.control
[numeric]

Min : 0
Mean : 0
Max : 1

0 : 115681 (98.9%)
1 : 1330 ( 1.1%)

117011
(33.06%)

236918
(66.94%)

211

PC1
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.2
IQR (CV) : 0 (582.5)

350118 distinct values

353929
(100%)

0
(0%)

212

PC2
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (-370.2)

347369 distinct values

353929
(100%)

0
(0%)

213

PC3
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.2
IQR (CV) : 0 (519.5)

350875 distinct values

353929
(100%)

0
(0%)

214

PC4
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.1
IQR (CV) : 0 (490.3)

351213 distinct values

353929
(100%)

0
(0%)

215

PC5
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (-849.9)

351068 distinct values

353929
(100%)

0
(0%)

216

PC6
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (-316.5)

350560 distinct values

353929
(100%)

0
(0%)

217

PC7
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (6723.8)

350903 distinct values

353929
(100%)

0
(0%)

218

PC8
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (14403.6)

351009 distinct values

353929
(100%)

0
(0%)

219

PC9
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.1
IQR (CV) : 0 (5096.2)

351079 distinct values

353929
(100%)

0
(0%)

220

PC10
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0
IQR (CV) : 0 (-2280.7)

350960 distinct values

353929
(100%)

0
(0%)

221

Array
[factor]

1. UKBB
2. UKBL

315486 (89.1%)
38443 (10.9%)

353929
(100%)

0
(0%)

222

Batch
[factor]

1. Batch_b001
2. Batch_b002
3. Batch_b003
4. Batch_b004
5. Batch_b005
6. Batch_b006
7. Batch_b007
8. Batch_b008
9. Batch_b009
10. Batch_b010
[ 96 others ]

3381 ( 1.0%)
3373 ( 1.0%)
3362 ( 0.9%)
3364 ( 1.0%)
3376 ( 1.0%)
3421 ( 1.0%)
3355 ( 0.9%)
3432 ( 1.0%)
3385 ( 1.0%)
3314 ( 0.9%)
320166 (90.5%)

353929
(100%)

0
(0%)

223

Menopause_bin
[factor]

1. 0
2. 1

228955 (64.7%)
124974 (35.3%)

353929
(100%)

0
(0%)

Export data

write.table(complete,
            file = paste("data/BCpheno_complete",date,".txt", sep =""),
            quote = T,
            col.names = T,
            row.names = F)

write.table(complete[,1:2],
            file = paste("data/BCpheno_complete_IDs",date,".txt", sep =""),
            quote = T,
            col.names = T,
            row.names = F)

Subset of healthy individuals

without medication, cancer, somatic disease influencing BC, psychiatric disorder, did not answer smoking or alcohol

healthy <- complete %>%
  filter(Tobacco_current != -3 &
           Alcohol_frequency != -3 &
    (is.na(SRAnyMDX) | SRAnyMDX == 0) &
           MetabolicMed == 0 &
           DxCancerAny == 0 &
           DxBodyCompAny == 0 &
           SRAnyIllnessBC == 0 &
           SRAnyCancerBC == 0)

#healthy <- complete %>%
#  filter((is.na(NoSRConditions) | NoSRConditions == 1) &
#           MetabolicMed == 0 &
#           DxCancerAny == 0 &
#           DxBodyCompAny == 0 &
#           SRAnyIllnessBC == 0 &
#           SRAnyCancer == 0)

dim(healthy)
[1] 155938    224
# 155961    321 OLD
# 191162 old without MHQ

## 173374 new excluding participants who endorsed a psychiatric disorder via MHQ

## 155961 new excluding, ICD, cancer register, self-report, MHQ

Female and male subset

### Female
healthy_female <- subset(healthy, Gender == "Female")
dim(healthy_female)
[1] 70686   224
# 70700   321
### Male
healthy_male <- subset(healthy, Gender == "Male")
dim(healthy_male)
[1] 85252   224
# 85261   321

Descriptives: Healthy (150K)

summarytools::dfSummary(healthy[,-1],
                        bootstrap.css     = FALSE,       # Already part of the theme so no need for it
                        plain.ascii       = FALSE,       # One of the essential settings
                        style             = "grid", # Idem.
                        dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
                        footnote          = NA,          # Keeping the results minimalistic
                        subtitle.emphasis = FALSE,
                        graph.magnif = 0.75,
                        tmp.img.dir = "/tmp")

Data Frame Summary

healthy

Dimensions: 155938 x 223
Duplicates: 0

No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing

1

Passed_GQC
[factor]

1. 0
2. Male
3. Female

0 ( 0.0%)
85252 (54.7%)
70686 (45.3%)

155938
(100%)

0
(0%)

2

EuropeanGenetic
[numeric]

1 distinct value

1 : 155938 (100.0%)

155938
(100%)

0
(0%)

3

Gender
[factor]

1. Female
2. Male

70686 (45.3%)
85252 (54.7%)

155938
(100%)

0
(0%)

4

Age
[integer]

Mean (sd) : 54.9 (8.1)
min < med < max:
38 < 55 < 73
IQR (CV) : 14 (0.1)

36 distinct values

155938
(100%)

0
(0%)

5

Height
[numeric]

Mean (sd) : 170.4 (9.3)
min < med < max:
133 < 170 < 209
IQR (CV) : 14 (0.1)

486 distinct values

155938
(100%)

0
(0%)

6

Weight
[numeric]

Mean (sd) : 78.1 (15.1)
min < med < max:
34 < 77 < 196.1
IQR (CV) : 20.3 (0.2)

1120 distinct values

155938
(100%)

0
(0%)

7

BMI
[numeric]

Mean (sd) : 26.8 (4.2)
min < med < max:
15.1 < 26.3 < 64.8
IQR (CV) : 5.2 (0.2)

392 distinct values

155938
(100%)

0
(0%)

8

WC
[numeric]

Mean (sd) : 89.4 (12.6)
min < med < max:
20 < 89 < 182
IQR (CV) : 18 (0.1)

577 distinct values

155938
(100%)

0
(0%)

9

HC
[numeric]

Mean (sd) : 102.5 (8.1)
min < med < max:
39 < 102 < 175
IQR (CV) : 10 (0.1)

439 distinct values

155938
(100%)

0
(0%)

10

BFPC
[numeric]

Mean (sd) : 29.3 (8.2)
min < med < max:
6.5 < 28.6 < 69.3
IQR (CV) : 11.6 (0.3)

516 distinct values

155938
(100%)

0
(0%)

11

FM
[numeric]

Mean (sd) : 23.1 (8.5)
min < med < max:
5 < 21.9 < 109.8
IQR (CV) : 10.1 (0.4)

696 distinct values

155938
(100%)

0
(0%)

12

FFM
[numeric]

Mean (sd) : 55.1 (11.6)
min < med < max:
27.3 < 55.1 < 99.4
IQR (CV) : 19.8 (0.2)

666 distinct values

155938
(100%)

0
(0%)

13

SES
[numeric]

Mean (sd) : -1.7 (2.9)
min < med < max:
-6.3 < -2.4 < 11
IQR (CV) : 3.7 (-1.7)

38382 distinct values

155938
(100%)

0
(0%)

14

Tobacco_current_orig
[integer]

Mean (sd) : 0.1 (0.4)
min < med < max:
0 < 0 < 2
IQR (CV) : 0 (3.4)

0 : 142121 (91.1%)
1 : 9462 ( 6.1%)
2 : 4355 ( 2.8%)

155938
(100%)

0
(0%)

15

Alcohol_frequency_orig
[integer]

Mean (sd) : 2.7 (1.4)
min < med < max:
1 < 3 < 6
IQR (CV) : 1 (0.5)

1 : 34825 (22.3%)
2 : 41946 (26.9%)
3 : 43052 (27.6%)
4 : 16310 (10.5%)
5 : 12950 ( 8.3%)
6 : 6855 ( 4.4%)

155938
(100%)

0
(0%)

16

Centre
[factor]

1. 10003
2. 11001
3. 11002
4. 11003
5. 11004
6. 11005
7. 11006
8. 11007
9. 11008
10. 11009
[ 12 others ]

122 ( 0.1%)
3748 ( 2.4%)
4699 ( 3.0%)
5631 ( 3.6%)
6177 ( 4.0%)
6171 ( 4.0%)
6160 ( 4.0%)
10977 ( 7.0%)
8351 ( 5.4%)
10763 ( 6.9%)
93139 (59.7%)

155938
(100%)

0
(0%)

17

WHR
[numeric]

Mean (sd) : 0.9 (0.1)
min < med < max:
0.2 < 0.9 < 2.1
IQR (CV) : 0.1 (0.1)

5166 distinct values

155938
(100%)

0
(0%)

18

Menopause_new
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6

85252 (54.7%)
0 ( 0.0%)
44449 (28.5%)
0 ( 0.0%)
3032 ( 1.9%)
0 ( 0.0%)
23205 (14.9%)

155938
(100%)

0
(0%)

19

Pregnancy_no_NA
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4

85252 (54.7%)
0 ( 0.0%)
70686 (45.3%)
0 ( 0.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

20

Alcohol_frequency
[ordered, factor]

1. Never
2. Special occasions only
3. One to three times a mont
4. Once or twice a week
5. Three or four times a wee
6. Daily or almost daily

6855 ( 4.4%)
12950 ( 8.3%)
16310 (10.5%)
43052 (27.6%)
41946 (26.9%)
34825 (22.3%)

155938
(100%)

0
(0%)

21

Tobacco_current
[ordered, factor]

1. No
2. Only occasionally
3. Yes, on most or all days

142121 (91.1%)
9462 ( 6.1%)
4355 ( 2.8%)

155938
(100%)

0
(0%)

22

WHRadjBMI
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.6 < 0 < 1.3
IQR (CV) : 0.1 (21.1)

65734 distinct values

155938
(100%)

0
(0%)

23

WCadjBMI
[numeric]

Mean (sd) : 0.5 (7.6)
min < med < max:
-56.9 < 0.9 < 73.3
IQR (CV) : 11.2 (14.7)

11849 distinct values

155938
(100%)

0
(0%)

24

HCadjBMI
[numeric]

Mean (sd) : 0.2 (4.4)
min < med < max:
-57.7 < 0.2 < 77.6
IQR (CV) : 5.7 (27.1)

9254 distinct values

155938
(100%)

0
(0%)

25

WHRadjBFPC
[numeric]

Mean (sd) : 0 (0.1)
min < med < max:
-0.7 < 0 < 1.2
IQR (CV) : 0.1 (-20.3)

96394 distinct values

155938
(100%)

0
(0%)

26

WCadjBFPC
[numeric]

Mean (sd) : -0.1 (12.5)
min < med < max:
-63.8 < 0.6 < 91.3
IQR (CV) : 18.8 (-197.5)

21070 distinct values

155938
(100%)

0
(0%)

27

HCadjBFPC
[numeric]

Mean (sd) : 0.5 (7)
min < med < max:
-57.6 < 0.5 < 81.1
IQR (CV) : 9.8 (15.5)

16122 distinct values

155938
(100%)

0
(0%)

28

FFMadjHeight
[numeric]

Mean (sd) : -0.1 (6.6)
min < med < max:
-26.7 < -0.3 < 36.7
IQR (CV) : 8.7 (-83.7)

19878 distinct values

155938
(100%)

0
(0%)

29

BFPCadjHeight
[numeric]

Mean (sd) : -1.2 (7)
min < med < max:
-35.3 < -1.4 < 36.3
IQR (CV) : 9.3 (-5.7)

21482 distinct values

155938
(100%)

0
(0%)

30

BFPC_trunk
[numeric]

Mean (sd) : 29.5 (7.6)
min < med < max:
2 < 29.4 < 75
IQR (CV) : 9.9 (0.3)

570 distinct values

155841
(99.94%)

97
(0.06%)

31

FM_trunk
[numeric]

Mean (sd) : 13 (4.8)
min < med < max:
0.6 < 12.6 < 59.9
IQR (CV) : 6.1 (0.4)

399 distinct values

155833
(99.93%)

105
(0.07%)

32

FFM_trunk
[numeric]

Mean (sd) : 30.6 (6.1)
min < med < max:
3 < 30.6 < 57.6
IQR (CV) : 10.3 (0.2)

375 distinct values

155803
(99.91%)

135
(0.09%)

33

Impedance_wb
[integer]

Mean (sd) : 592.2 (85.7)
min < med < max:
51 < 583 < 998
IQR (CV) : 125 (0.1)

644 distinct values

155929
(99.99%)

9
(0.01%)

34

Ethnicity
[factor]

1. European
2. African
3. Asian
4. Chinese
5. Mixed
6. Other

154868 (99.7%)
0 ( 0.0%)
24 ( 0.0%)
0 ( 0.0%)
214 ( 0.1%)
279 ( 0.2%)

155385
(99.65%)

553
(0.35%)

35

Region
[factor]

1. 1
2. 2
3. 3
4. 4
5. 5
6. 6
7. 7
8. 8
9. 9
10. 11
[ 6 others ]

0 ( 0.0%)
3 ( 0.0%)
10 ( 0.0%)
1 ( 0.0%)
119469 (77.4%)
10730 ( 7.0%)
8236 ( 5.3%)
3687 ( 2.4%)
2 ( 0.0%)
9452 ( 6.1%)
2750 ( 1.8%)

154340
(98.98%)

1598
(1.02%)

36

Income
[factor]

1. -3
2. -1
3. 1
4. 2
5. 3
6. 4
7. 5

13878 ( 8.9%)
4430 ( 2.9%)
20818 (13.4%)
31528 (20.2%)
39319 (25.3%)
35458 (22.8%)
10233 ( 6.6%)

155664
(99.82%)

274
(0.18%)

37

Birth_weight
[numeric]

Mean (sd) : 3.4 (0.6)
min < med < max:
0.4 < 3.4 < 9
IQR (CV) : 0.7 (0.2)

289 distinct values

90603
(58.1%)

65335
(41.9%)

38

Menarche_age_at
[integer]

Mean (sd) : 12.6 (2.8)
min < med < max:
-3 < 13 < 24
IQR (CV) : 2 (0.2)

22 distinct values

70686
(45.33%)

85252
(54.67%)

39

Contraceptive_ever
[factor]

1. -3
2. -1
3. 0
4. 1

63 ( 0.1%)
50 ( 0.1%)
11229 (15.9%)
59344 (84.0%)

70686
(45.33%)

85252
(54.67%)

40

HRT
[factor]

1. -3
2. -1
3. 0
4. 1

21 ( 0.0%)
97 ( 0.1%)
53027 (75.0%)
17541 (24.8%)

70686
(45.33%)

85252
(54.67%)

41

Breastfed
[factor]

1. -3
2. -1
3. 0
4. 1

46 ( 0.0%)
35042 (22.5%)
35694 (22.9%)
85156 (54.6%)

155938
(100%)

0
(0%)

42

VAT
[integer]

Mean (sd) : 1215.2 (877.1)
min < med < max:
8 < 1067.5 < 5135
IQR (CV) : 1210.2 (0.7)

1148 distinct values

1520
(0.97%)

154418
(99.03%)

43

BMC_wb
[integer]

Mean (sd) : 2745.7 (580.4)
min < med < max:
632 < 2748 < 4508
IQR (CV) : 884.2 (0.2)

1087 distinct values

1536
(0.99%)

154402
(99.01%)

44

BMD_total
[numeric]

Mean (sd) : 1.2 (0.2)
min < med < max:
0.8 < 1.2 < 1.8
IQR (CV) : 0.2 (0.1)

562 distinct values

1536
(0.99%)

154402
(99.01%)

45

BipolarInitialQ
[factor]

1. 1
2. 2

62 (26.2%)
175 (73.8%)

237
(0.15%)

155701
(99.85%)

46

Age.At.MHQ
[integer]

Mean (sd) : 63 (7.7)
min < med < max:
46 < 64 < 80
IQR (CV) : 12 (0.1)

35 distinct values

47666
(30.57%)

108272
(69.43%)

47

Migrant.Status
[factor]

1. 0
2. 1

44729 (93.9%)
2927 ( 6.1%)

47656
(30.56%)

108282
(69.44%)

48

Highest.Qualification
[factor]

1. ALevel
2. Degree
3. GCSE
4. NoneOfTheAbove
5. Other

6507 (13.7%)
23639 (49.8%)
12733 (26.8%)
2558 ( 5.4%)
2040 ( 4.3%)

47477
(30.45%)

108461
(69.55%)

49

Smoker
[factor]

1. Current
2. Former
3. Never
4. PreferNotToAnswer

2923 ( 6.1%)
15401 (32.3%)
29278 (61.4%)
64 ( 0.1%)

47666
(30.57%)

108272
(69.43%)

50

Longstanding.Illness
[factor]

1. 0
2. 1

40506 (86.1%)
6566 (14.0%)

47072
(30.19%)

108866
(69.81%)

51

Diabetes
[factor]

1. 0
2. 1

47573 (99.9%)
50 ( 0.1%)

47623
(30.54%)

108315
(69.46%)

52

Cancer
[factor]

1. 0
2. 1

47504 (99.7%)
128 ( 0.3%)

47632
(30.55%)

108306
(69.45%)

53

CVD
[factor]

1. 0
2. 1

40499 (85.0%)
7126 (15.0%)

47625
(30.54%)

108313
(69.46%)

54

Respiratory
[factor]

1. 0
2. 1

33189 (69.7%)
14460 (30.3%)

47649
(30.56%)

108289
(69.44%)

55

Neuroticism
[integer]

Mean (sd) : 3 (2.7)
min < med < max:
0 < 2 < 12
IQR (CV) : 4 (0.9)

13 distinct values

40810
(26.17%)

115128
(73.83%)

56

SRSocPhobia
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

57

SRSchizophrenia
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

58

SRPsychosisOther
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

59

SRPsychosisAny
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

60

SRPersonalityDisorder
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

61

SROtherPhobia
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

62

SRPanicAttacks
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

63

SROCD
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

64

SRManiaBIP
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

65

SRDepression
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

66

SRMood
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

67

SRBulimiaNervosa
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

68

SRBingeEating
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

69

SRASD
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

70

SRGADandOthers
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

71

SRAnorexiaNervosa
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

72

SREatingDisorderAny
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

73

SRAgoraphobia
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

74

SRAnxietyAny
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

75

SRADHD
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

76

SRPNTA
[integer]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

77

SmithDepression
[factor]

1. 0
2. 1

11606 (92.4%)
953 ( 7.6%)

12559
(8.05%)

143379
(91.95%)

78

PHQ9.No.Info
[factor]

1. 0
2. 1

47635 (99.9%)
31 ( 0.1%)

47666
(30.57%)

108272
(69.43%)

79

PHQ9.Screen
[factor]

1. 0
2. 1

38732 (81.3%)
8934 (18.7%)

47666
(30.57%)

108272
(69.43%)

80

PHQ9.Items
[factor]

1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6
8. 7
9. 8
10. 9

40089 (84.1%)
4878 (10.2%)
1475 ( 3.1%)
543 ( 1.1%)
289 ( 0.6%)
167 ( 0.4%)
99 ( 0.2%)
74 ( 0.2%)
36 ( 0.1%)
16 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

81

PHQ9.Severity
[integer]

Mean (sd) : 1.9 (2.7)
min < med < max:
0 < 1 < 27
IQR (CV) : 3 (1.4)

27 distinct values

47666
(30.57%)

108272
(69.43%)

82

Depressed.Ever
[factor]

1. 0
2. 1

36745 (88.4%)
4829 (11.6%)

41574
(26.66%)

114364
(73.34%)

83

Depressed.Ever.Severe
[factor]

1. 0
2. 1

41308 (99.4%)
266 ( 0.6%)

41574
(26.66%)

114364
(73.34%)

84

Recurrent.Depression
[factor]

1. 0
2. 1

13220 (86.0%)
2145 (14.0%)

15365
(9.85%)

140573
(90.15%)

85

Single.Depression
[factor]

1. 0
2. 1

12751 (83.0%)
2614 (17.0%)

15365
(9.85%)

140573
(90.15%)

86

SmithBipolar
[factor]

1. 0
2. 1

12520 (99.7%)
39 ( 0.3%)

12559
(8.05%)

143379
(91.95%)

87

GAD7.Severity
[integer]

Mean (sd) : 1.4 (2.5)
min < med < max:
0 < 0 < 21
IQR (CV) : 2 (1.7)

22 distinct values

47647
(30.56%)

108291
(69.44%)

88

GAD.Ever
[factor]

1. 0
2. 1

36416 (97.8%)
837 ( 2.2%)

37253
(23.89%)

118685
(76.11%)

89

GAD.Current
[factor]

1. 0
2. 1

37084 (99.6%)
168 ( 0.4%)

37252
(23.89%)

118686
(76.11%)

90

AUDIT.Score
[integer]

Mean (sd) : 5.3 (4)
min < med < max:
0 < 4 < 33
IQR (CV) : 4 (0.7)

34 distinct values

47666
(30.57%)

108272
(69.43%)

91

Alcohol.Use.Disorder
[factor]

1. 0
2. 1

16764 (60.1%)
11152 (40.0%)

27916
(17.9%)

128022
(82.1%)

92

Cannabis.Ever
[factor]

1. 0
2. 1

36380 (76.4%)
11220 (23.6%)

47600
(30.52%)

108338
(69.48%)

93

Cannabis.Daily
[factor]

1. 0
2. 1

10660 (94.5%)
626 ( 5.5%)

11286
(7.24%)

144652
(92.76%)

94

Addiction.Ever.SelfReport
[factor]

1. 0
2. 1

45330 (96.0%)
1915 ( 4.0%)

47245
(30.3%)

108693
(69.7%)

95

Addiction.Ever
[factor]

1. 0
2. 1

44141 (95.8%)
1915 ( 4.2%)

46056
(29.53%)

109882
(70.47%)

96

Substance.Addiction.Ever
[factor]

1. 0
2. 1

1107 (57.8%)
807 (42.2%)

1914
(1.23%)

154024
(98.77%)

97

Alcohol.Dependence.Ever
[factor]

1. 0
2. 1

478 (83.6%)
94 (16.4%)

572
(0.37%)

155366
(99.63%)

98

Addiction.Current
[factor]

1. 0
2. 1

617 (50.5%)
605 (49.5%)

1222
(0.78%)

154716
(99.22%)

99

Trauma.Childhood
[factor]

1. 0
2. 1

27978 (59.6%)
18965 (40.4%)

46943
(30.1%)

108995
(69.9%)

100

Trauma.Adult
[factor]

1. 0
2. 1

24536 (53.2%)
21544 (46.8%)

46080
(29.55%)

109858
(70.45%)

101

Trauma.Catastrophic
[factor]

1. 0
2. 1

27077 (56.8%)
20585 (43.2%)

47662
(30.56%)

108276
(69.44%)

102

PTSD
[factor]

1. 0
2. 1

46367 (97.5%)
1184 ( 2.5%)

47551
(30.49%)

108387
(69.51%)

103

Self.Harm.Ever
[factor]

1. 0
2. 1

46717 (98.2%)
874 ( 1.8%)

47591
(30.52%)

108347
(69.48%)

104

Not.Worth.Living
[integer]

Min : 0
Mean : 0.2
Max : 1

0 : 37962 (79.6%)
1 : 9704 (20.4%)

47666
(30.57%)

108272
(69.43%)

105

Self.Harm.Suicide.Attempt
[factor]

1. 0
2. 1

568 (66.9%)
281 (33.1%)

849
(0.54%)

155089
(99.46%)

106

WellbeingScore
[integer]

Mean (sd) : 13.1 (1.8)
min < med < max:
3 < 13 < 17
IQR (CV) : 2 (0.1)

15 distinct values

46375
(29.74%)

109563
(70.26%)

107

NoSRConditions
[factor]

1. 0
2. 1

1915 ( 4.0%)
45751 (96.0%)

47666
(30.57%)

108272
(69.43%)

108

MHQ
[factor]

1. 1

47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

109

SRAnyMDX
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

110

SRANpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

111

SRBNpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

112

SRBEDpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

113

Corticoids
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

114

Antidiabetics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

115

Diuretics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

116

Gonadotropins
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

117

Growth_Hormone
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

118

AntiretroviralMed
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

119

HRT_Contraceptives
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

120

Antiosteoporotics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

121

Testosterone
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

122

Thyreostatics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

123

AntitubercularAntileproticMed
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

124

Antidepressants
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

125

Antineoplastics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

126

Antipsychotics
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

127

Anxiolytics
[factor]

1. 0
2. 1

155483 (99.7%)
455 ( 0.3%)

155938
(100%)

0
(0%)

128

MetabolicMed
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

129

PsychotropicMed
[factor]

1. 0
2. 1

155483 (99.7%)
455 ( 0.3%)

155938
(100%)

0
(0%)

130

DxCancerAny
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

131

DxBodyCompAny
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

132

CancerRegister
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

133

DxConnectiveTissue
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

134

DxDiabetes
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

135

DxEndocrine
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

136

DxGlucose
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

137

DxHIV
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

138

DxIBD
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

139

DxIBS
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

140

DxLiver
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

141

DxPsy
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

142

DxMetabolic
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

143

DxMuscles
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

144

DxPancreatitis
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

145

DxThyroid
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

146

DxTuberculosis
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

147

F500
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

148

F501
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

149

F502
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

150

F505
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

151

F508
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

152

F509
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

153

SRAnyIllnessBC
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

154

SRAnyIllnessBC.0
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

155

SRAnyIllnessBC.1
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

156

SRAnyIllnessBC.2
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

157

SRAnyPsyIllness
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

158

SRAnyNeuroIllness
[factor]

1. 0
2. 1

153316 (98.3%)
2622 ( 1.7%)

155938
(100%)

0
(0%)

159

SRAnyCancerBC
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

160

SRAnyCancer
[factor]

1. 0
2. 1

155370 (99.6%)
568 ( 0.4%)

155938
(100%)

0
(0%)

161

SRAnyCancerNeuro
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

162

SRHeartAttack
[factor]

1. 1

1406 (100.0%)

1406
(0.9%)

154532
(99.1%)

163

SRAngina
[factor]

1. 1

1701 (100.0%)

1701
(1.09%)

154237
(98.91%)

164

SRStroke
[factor]

1. 1

1011 (100.0%)

1011
(0.65%)

154927
(99.35%)

165

SRHighBloodPressure
[factor]

1. 1

26577 (100.0%)

26577
(17.04%)

129361
(82.96%)

166

ICD.autoimmune
[integer]

Min : 0
Mean : 0
Max : 1

0 : 153497 (98.4%)
1 : 2441 ( 1.6%)

155938
(100%)

0
(0%)

167

ICD.autoinflammatory
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 143545 (92.0%)
1 : 12393 ( 8.0%)

155938
(100%)

0
(0%)

168

ICD.immunodeficiency
[integer]

Min : 0
Mean : 0
Max : 1

0 : 155914 (100.0%)
1 : 24 ( 0.0%)

155938
(100%)

0
(0%)

169

ICD.memory
[integer]

Min : 0
Mean : 0
Max : 1

0 : 155693 (99.8%)
1 : 245 ( 0.2%)

155938
(100%)

0
(0%)

170

ICD.metabolic
[integer]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

171

ICD.psychiatric
[integer]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

172

ICD.immunodysregulation
[integer]

Min : 0
Mean : 0.1
Max : 1

0 : 142471 (91.4%)
1 : 13467 ( 8.6%)

155938
(100%)

0
(0%)

173

SRAnorexiaNervosa.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

174

F500.numeric
[integer]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

175

F501.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

176

F502.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

177

F505.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

178

F508.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

179

F509.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

180

SRBulimiaNervosa.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

181

SRBingeEating.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

182

AN.count
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

183

BN.count
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

184

BED.count
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

185

EDNOS.count
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

186

PUR.count
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

187

Pica.count
[factor]

1. 0
2. 1

155938 (100.0%)
0 ( 0.0%)

155938
(100%)

0
(0%)

188

AN.count.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

189

BN.count.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

190

BED.count.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

191

EDNOS.count.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

192

PUR.count.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

193

Pica.count.numeric
[numeric]

1 distinct value

0 : 155938 (100.0%)

155938
(100%)

0
(0%)

194

AN
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

195

AN.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

196

BN
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

197

BN.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

198

BED
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

199

BED.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

200

ANpure.count
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

201

ANpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

202

ANpure.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

203

BNpure.count
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

204

BNpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

205

BNpure.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

206

BEDpure.count
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

207

BEDpure
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

208

BEDpure.numeric
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

209

ED
[factor]

1. 0
2. 1

47666 (100.0%)
0 ( 0.0%)

47666
(30.57%)

108272
(69.43%)

210

ED.control
[numeric]

1 distinct value

0 : 47666 (100.0%)

47666
(30.57%)

108272
(69.43%)

211

PC1
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.2
IQR (CV) : 0 (255.1)

155204 distinct values

155938
(100%)

0
(0%)

212

PC2
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (125.5)

154615 distinct values

155938
(100%)

0
(0%)

213

PC3
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.2
IQR (CV) : 0 (96.8)

155352 distinct values

155938
(100%)

0
(0%)

214

PC4
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.1
IQR (CV) : 0 (54)

155402 distinct values

155938
(100%)

0
(0%)

215

PC5
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (-277.3)

155375 distinct values

155938
(100%)

0
(0%)

216

PC6
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0
IQR (CV) : 0 (-102.4)

155296 distinct values

155938
(100%)

0
(0%)

217

PC7
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (-414.4)

155341 distinct values

155938
(100%)

0
(0%)

218

PC8
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (-846.7)

155366 distinct values

155938
(100%)

0
(0%)

219

PC9
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.1
IQR (CV) : 0 (440.9)

155388 distinct values

155938
(100%)

0
(0%)

220

PC10
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0
IQR (CV) : 0 (-666.1)

155345 distinct values

155938
(100%)

0
(0%)

221

Array
[factor]

1. UKBB
2. UKBL

140193 (89.9%)
15745 (10.1%)

155938
(100%)

0
(0%)

222

Batch
[factor]

1. Batch_b001
2. Batch_b002
3. Batch_b003
4. Batch_b004
5. Batch_b005
6. Batch_b006
7. Batch_b007
8. Batch_b008
9. Batch_b009
10. Batch_b010
[ 96 others ]

1499 ( 1.0%)
1488 ( 1.0%)
1461 ( 0.9%)
1457 ( 0.9%)
1531 ( 1.0%)
1477 ( 0.9%)
1470 ( 0.9%)
1547 ( 1.0%)
1571 ( 1.0%)
1518 ( 1.0%)
140919 (90.4%)

155938
(100%)

0
(0%)

223

Menopause_bin
[factor]

1. 0
2. 1

111489 (71.5%)
44449 (28.5%)

155938
(100%)

0
(0%)

Export data

write.table(healthy,
            file = paste("data/BCpheno_healthy",date,".txt", sep =""),
            quote = T,
            col.names = T,
            row.names = F)

write.table(healthy[,1:3],
            file = paste("data/BCpheno_healthy_IDs",date,".txt", sep =""),
            quote = T,
            col.names = T,
            row.names = F)

Eating disorder cases for eating disorder GWAS

EDcases <- GIDpheno %>%
  filter(ED == 1 # has an eating disorder: MHQ, or main ICD, or secondary ICD diagnosis
         )
EDcases$EDcontrol <- 0
dim(EDcases)
[1] 1550  224
EDcases_female <- subset(EDcases, Gender == "Female")
EDcases_female_N <- nrow(EDcases_female)

EDcases_male <- subset(EDcases, Gender == "Male")
EDcases_male_N <- nrow(EDcases_male)

EDcontrols <- GIDpheno %>%
  filter(MHQ == 1 & # has answered the mental health questionnaire (MHQ)
           !is.na(MHQ) &
           SRAnyMDX == 0 &
           PsychotropicMed == 0 & # Any self-reported psychotropic medication
           DxPsy == 0 & # Any ICD F code
           Tobacco_current != -3 & # Did not answer tobacco question
           Alcohol_frequency != -3 & # Did not answer alcohol question
           SRAnyPsyIllness == 0 # Did self-report any psychiatric disorder in the sign up questionnaire
         )
dim(EDcontrols)
[1] 80986   223
# Check if any cases are contained in the controls
length(setdiff(EDcases$IID, EDcontrols$IID))
[1] 1550
EDcontrols_female <- subset(EDcontrols, Gender == "Female")
EDcontrols_female_N <- nrow(EDcontrols_female)

EDcontrols_male <- subset(EDcontrols, Gender == "Male")
EDcontrols_male_N <- nrow(EDcontrols_male)

Sample controls in 10:1 ratio

# Case control ratio
c_c_ratio = 10
EDcontrols_female_N = EDcases_female_N * c_c_ratio
EDcontrols_female_N
[1] 14300
EDcontrols_male_N = EDcases_male_N * c_c_ratio
EDcontrols_male_N
[1] 1200
# Randomly select controls in c_c_ratio 
set.seed(23497)
EDcontrols_female_random <- dplyr::sample_n(EDcontrols_female, EDcontrols_female_N, replace = FALSE)
dim(EDcontrols_female_random)
[1] 14300   223
# Randomly select male 1:4 controls
set.seed(23497)
EDcontrols_male_random <- dplyr::sample_n(EDcontrols_male, EDcontrols_male_N, replace = FALSE)
dim(EDcontrols_male_random)
[1] 1200  223
# Rowbind female and male controls
EDcontrols_random <- rbind(EDcontrols_female_random, EDcontrols_male_random)
EDcontrols_random$EDcontrol <- 1

## Rowbind cases and controls
EDcases_controls <- rbind(EDcases, EDcontrols_random)
dim(EDcases_controls)
[1] 17050   224
summary(EDcases_controls[,EDcols])
 AN.count     BN.count     BED.count    EDNOS.count PUR.count Pica.count
 0   :16257   0   :16582   0   :16430   0:17027     0:17049   0:17050   
 1   :  768   1   :  423   1   :  561   1:   23     1:    1   1:    0   
 NA's:   25   NA's:   45   NA's:   59                                   
    AN           BN          BED         ANpure       BNpure     
 0   :15500   0   :15500   0   :15500   0   :15500   0   :15500  
 1   :  768   1   :  423   1   :  561   1   :  615   1   :  251  
 NA's:  782   NA's: 1127   NA's:  989   NA's:  935   NA's: 1299  
 BEDpure      ED        SRAnorexiaNervosa SRBulimiaNervosa SRBingeEating
 0   :15500   0:15500   0   :16259        0   :16582       0   :16430   
 1   :  468   1: 1550   1   :  732        1   :  409       1   :  561   
 NA's: 1082             NA's:   59        NA's:   59       NA's:   59   
 SREatingDisorderAny SRANpure     SRBNpure     SRBEDpure   
 0   :15503          0   :16265   0   :16588   0   :16436  
 1   : 1488          1   :  726   1   :  403   1   :  555  
 NA's:   59          NA's:   59   NA's:   59   NA's:   59  
# Write output and IID lists
write.table(EDcases, file = paste("EDcases/EDcases",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcases_female, file = paste("EDcases/EDcases_female",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcases_male, file = paste("EDcases/EDcases_male",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcases$IID, file = paste("EDcases/EDcases_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcases_female$IID, file = paste("EDcases/EDcases_female_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcases_male$IID, file = paste("EDcases/EDcases_male_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcontrols_random, file = paste("EDcases/EDcontrols",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcontrols_female_random, file = paste("EDcases/EDcontrols_female",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcontrols_male_random, file = paste("EDcases/EDcontrols_male",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcontrols_random$IID, file = paste("EDcases/EDcontrols_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcontrols_female_random$IID, file = paste("EDcases/EDcontrols_female_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcontrols_male_random$IID, file = paste("EDcases/EDcontrols_male_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)
write.table(EDcases_controls, file = paste("EDcases/EDcases_controls",date,".txt", sep =""),
            quote = T, col.names = T, row.names = F)
write.table(EDcases_controls$IID, file = paste("EDcases/EDcases_controls_ids",date,".txt", sep =""),
            quote = F, col.names = F, row.names = F)

Eating disorders crosstables for MPRS

ctable(x = EDcases_controls$AN, y = EDcases_controls$BN)
Cross-Tabulation, Row Proportions  
AN * BN  
Data Frame: EDcases_controls  

------- ---- ---------------- ------------- -------------- ----------------
          BN                0             1           <NA>            Total
     AN                                                                    
      0        15500 (100.0%)     0 ( 0.0%)      0 ( 0.0%)   15500 (100.0%)
      1            0 (  0.0%)   126 (16.4%)    642 (83.6%)     768 (100.0%)
   <NA>            0 (  0.0%)   297 (38.0%)    485 (62.0%)     782 (100.0%)
  Total        15500 ( 90.9%)   423 ( 2.5%)   1127 ( 6.6%)   17050 (100.0%)
------- ---- ---------------- ------------- -------------- ----------------
ctable(x = EDcases_controls$AN, y = EDcases_controls$BED)
Cross-Tabulation, Row Proportions  
AN * BED  
Data Frame: EDcases_controls  

------- ----- ---------------- ------------- ------------- ----------------
          BED                0             1          <NA>            Total
     AN                                                                    
      0         15500 (100.0%)     0 ( 0.0%)     0 ( 0.0%)   15500 (100.0%)
      1             0 (  0.0%)    46 ( 6.0%)   722 (94.0%)     768 (100.0%)
   <NA>             0 (  0.0%)   515 (65.9%)   267 (34.1%)     782 (100.0%)
  Total         15500 ( 90.9%)   561 ( 3.3%)   989 ( 5.8%)   17050 (100.0%)
------- ----- ---------------- ------------- ------------- ----------------
ctable(x = EDcases_controls$BN, y = EDcases_controls$BED)
Cross-Tabulation, Row Proportions  
BN * BED  
Data Frame: EDcases_controls  

------- ----- ---------------- ------------- ------------- ----------------
          BED                0             1          <NA>            Total
     BN                                                                    
      0         15500 (100.0%)     0 ( 0.0%)     0 ( 0.0%)   15500 (100.0%)
      1             0 (  0.0%)    68 (16.1%)   355 (83.9%)     423 (100.0%)
   <NA>             0 (  0.0%)   493 (43.7%)   634 (56.3%)    1127 (100.0%)
  Total         15500 ( 90.9%)   561 ( 3.3%)   989 ( 5.8%)   17050 (100.0%)
------- ----- ---------------- ------------- ------------- ----------------

Phenotype file for multi-polygenic risk scoring

EDcases_red <- EDcases_controls %>%
  select(IID,
         Age, BMI, Gender, SES, Ethnicity, BMD_total, BFPC,
         PC1, PC2, PC3, PC4, PC5, PC6, PC7, PC8, PC9, PC10,
         AN, BN, BED, ED, EDcontrol, MHQ,
         ANpure, BNpure, BEDpure,
         PsychotropicMed, Antidepressants, Anxiolytics, Antipsychotics,
         DxCancerAny)

MPRSUKB <- EDcases_controls %>%
  select(IID,
         AN, BN, BED, ED,
         Gender,
         PC1, PC2, PC3, PC4, PC5, PC6
)

MPRSUKB$FID <- MPRSUKB$IID

# Reorder
MPRSUKB <- MPRSUKB %>% select(FID, IID, everything())
dim(MPRSUKB)
[1] 17050    13
# Recode gender
MPRSUKB$Gender <- factor(MPRSUKB$Gender, levels = c("Female", "Male"), labels = c(0, 1))

write.table(MPRSUKB,
            paste("data/EDcases_controls_mprs",date,".txt", sep =""),
            quote = F,
            row.names = F,
            col.names = T,
            sep = "\t")

Descriptives: MPRS

summarytools::dfSummary(MPRSUKB[,-1],
                        bootstrap.css     = FALSE,       # Already part of the theme so no need for it
                        plain.ascii       = FALSE,       # One of the essential settings
                        style             = "grid", # Idem.
                        dfSummary.silent  = TRUE,        # Suppresses messages about temporary files
                        footnote          = NA,          # Keeping the results minimalistic
                        subtitle.emphasis = FALSE,
                        graph.magnif = 0.75,
                        tmp.img.dir = "/tmp")

Data Frame Summary

MPRSUKB

Dimensions: 17050 x 12
Duplicates: 0

No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing

1

IID
[integer]

Mean (sd) : 3515252.3 (1447368.2)
min < med < max:
1000088 < 3512055.5 < 6026139
IQR (CV) : 2500860.5 (0.4)

17050 distinct values

17050
(100%)

0
(0%)

2

AN
[factor]

1. 0
2. 1

15500 (95.3%)
768 ( 4.7%)

16268
(95.41%)

782
(4.59%)

3

BN
[factor]

1. 0
2. 1

15500 (97.3%)
423 ( 2.7%)

15923
(93.39%)

1127
(6.61%)

4

BED
[factor]

1. 0
2. 1

15500 (96.5%)
561 ( 3.5%)

16061
(94.2%)

989
(5.8%)

5

ED
[factor]

1. 0
2. 1

15500 (90.9%)
1550 ( 9.1%)

17050
(100%)

0
(0%)

6

Gender
[factor]

1. 0
2. 1

15730 (92.3%)
1320 ( 7.7%)

17050
(100%)

0
(0%)

7

PC1
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.2
IQR (CV) : 0 (20)

17042 distinct values

17050
(100%)

0
(0%)

8

PC2
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0
IQR (CV) : 0 (22.8)

17030 distinct values

17050
(100%)

0
(0%)

9

PC3
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (39.7)

17039 distinct values

17050
(100%)

0
(0%)

10

PC4
[numeric]

Mean (sd) : 0 (0)
min < med < max:
-0.1 < 0 < 0.1
IQR (CV) : 0 (314.5)

17043 distinct values

17050
(100%)

0
(0%)

11

PC5
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (49)

17042 distinct values

17050
(100%)

0
(0%)

12

PC6
[numeric]

Mean (sd) : 0 (0)
min < med < max:
0 < 0 < 0.1
IQR (CV) : 0 (-12.6)

17040 distinct values

17050
(100%)

0
(0%)

Extract data for diet questionnaire & inflammation

autoimmune.diet.variables <- c(
"IID",
"Gender",
"Age",
"Height",
"Weight",
"BMI",
"WC",
"HC",
"BFPC",
"FM",
"FFM",
"SES",
"Highest.Qualification",
"Ethnicity",
"Centre",
"WHR",
"Pregnancy_no_NA",
"Menopause_new",
"Alcohol_frequency",
"Tobacco_current",
"Smoker",
"WHRadjBMI",
"SRANpure",
"SRBNpure",
"SRBEDpure",
"MetabolicMed",
"Corticoids",
"Antidiabetics",
"Diuretics",
"Gonadotropins",
"Growth_Hormone",
"AntiretroviralMed",
"HRT_Contraceptives",
"Antiosteoporotics",
"Testosterone",
"Thyreostatics",
"AntitubercularAntileproticMed",
"Antidepressants",
"Antineoplastics",
"Antipsychotics",
"PsychotropicMed",
"Corticoids",
"Testosterone",
"Antidepressants",
"Anxiolytics",
"Antipsychotics",
"DxCancerAny",
"DxBodyCompAny",
"DxConnectiveTissue",
"DxDiabetes",
"DxEndocrine",
"DxGlucose",
"DxHIV",
"DxIBD",
"DxIBS",
"DxLiver",
"DxPsy",
"DxMetabolic",
"DxMuscles",
"DxPancreatitis",
"DxThyroid",
"DxTuberculosis",
"Trauma.Childhood",
"Trauma.Adult",
"Trauma.Catastrophic",
"PTSD",
"PHQ9.Severity",
"ICD.autoimmune",
"ICD.autoinflammatory",
"ICD.immunodeficiency",
"ICD.memory",
"ICD.metabolic",
"ICD.psychiatric",
"ICD.immunodysregulation",
"SRAnyIllnessBC",
"SRAnyPsyIllness",
"SRAnyNeuroIllness",
"SRAnyCancerNeuro",
"SRStroke",
"AN",
"BN",
"BED",
"ANpure",
"BNpure",
"BEDpure",
"PC1",
"PC2",
"PC3",
"PC4",
"PC5",
"PC6",
"EuropeanGenetic"
)

data.autoimmune.diet <- GID_pheno_all_genetic %>%
  select(autoimmune.diet.variables)

colnames(data.autoimmune.diet)
 [1] "IID"                           "Gender"                       
 [3] "Age"                           "Height"                       
 [5] "Weight"                        "BMI"                          
 [7] "WC"                            "HC"                           
 [9] "BFPC"                          "FM"                           
[11] "FFM"                           "SES"                          
[13] "Highest.Qualification"         "Ethnicity"                    
[15] "Centre"                        "WHR"                          
[17] "Pregnancy_no_NA"               "Menopause_new"                
[19] "Alcohol_frequency"             "Tobacco_current"              
[21] "Smoker"                        "WHRadjBMI"                    
[23] "SRANpure"                      "SRBNpure"                     
[25] "SRBEDpure"                     "MetabolicMed"                 
[27] "Corticoids"                    "Antidiabetics"                
[29] "Diuretics"                     "Gonadotropins"                
[31] "Growth_Hormone"                "AntiretroviralMed"            
[33] "HRT_Contraceptives"            "Antiosteoporotics"            
[35] "Testosterone"                  "Thyreostatics"                
[37] "AntitubercularAntileproticMed" "Antidepressants"              
[39] "Antineoplastics"               "Antipsychotics"               
[41] "PsychotropicMed"               "Anxiolytics"                  
[43] "DxCancerAny"                   "DxBodyCompAny"                
[45] "DxConnectiveTissue"            "DxDiabetes"                   
[47] "DxEndocrine"                   "DxGlucose"                    
[49] "DxHIV"                         "DxIBD"                        
[51] "DxIBS"                         "DxLiver"                      
[53] "DxPsy"                         "DxMetabolic"                  
[55] "DxMuscles"                     "DxPancreatitis"               
[57] "DxThyroid"                     "DxTuberculosis"               
[59] "Trauma.Childhood"              "Trauma.Adult"                 
[61] "Trauma.Catastrophic"           "PTSD"                         
[63] "PHQ9.Severity"                 "ICD.autoimmune"               
[65] "ICD.autoinflammatory"          "ICD.immunodeficiency"         
[67] "ICD.memory"                    "ICD.metabolic"                
[69] "ICD.psychiatric"               "ICD.immunodysregulation"      
[71] "SRAnyIllnessBC"                "SRAnyPsyIllness"              
[73] "SRAnyNeuroIllness"             "SRAnyCancerNeuro"             
[75] "SRStroke"                      "AN"                           
[77] "BN"                            "BED"                          
[79] "ANpure"                        "BNpure"                       
[81] "BEDpure"                       "PC1"                          
[83] "PC2"                           "PC3"                          
[85] "PC4"                           "PC5"                          
[87] "PC6"                           "EuropeanGenetic"              
save(object = data.autoimmune.diet, file = paste0("data/data.autoimmune.diet",date,".Rdata"))