Import the phenotypes that were extracted from the large UKB file (awk) ($PHENO/Full_Phenotype_Data_250917_With_MHQ_TABBED.txt)
UKBpheno <- fread(file = "data_raw/2019_02_29_BC/EDcases_pheno.txt",
header = TRUE,
data.table = FALSE)
dim(UKBpheno)
[1] 502618 17
# Recode as factor
UKBpheno$Gender <- factor(UKBpheno$Gender, levels = c(0, 1), labels = c("Female", "Male"))
UKBfactor <-c("Centre")
UKBpheno[UKBfactor] <- lapply(UKBpheno[UKBfactor], factor)
# Numeric columns
# Pregnancy and Menopause are numeric for recoding
UKBnumeric <-c("IID", "Age", "Height", "Weight",
"BMI", "WC", "HC",
"BFPC", "FM", "FFM",
"SES", "Pregnancy", "Menopause")
# Calculate waist-to-hip ratio (WHR)
UKBpheno$WHR <- UKBpheno$WC/UKBpheno$HC
# Check structure
str(UKBpheno)
'data.frame': 502618 obs. of 18 variables:
$ IID : int 1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
$ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 1 1 2 1 ...
$ Age : int 57 49 48 64 43 56 46 54 40 66 ...
$ Height : num 157 177 149 172 174 ...
$ Weight : num 75 77.3 54.3 78.3 94.6 93.3 60.6 61.5 105 78.5 ...
$ BMI : num 30.4 24.7 24.5 26.5 31.2 29.8 25.2 23.4 31.7 29.5 ...
$ WC : num 92 91 76 90 104 96 79 71 104 94 ...
$ HC : num 109 101 95 102 108 ...
$ BFPC : num 42.6 40.6 30.3 20.8 31.8 25 35.6 28.5 27.6 42.9 ...
$ FM : num 31.9 31.4 16.5 16.3 30.1 23.3 21.6 17.5 29 33.7 ...
$ FFM : num 43.1 46 37.9 62 64.5 70 39 44 76 44.8 ...
$ SES : num -4.057 -3.1261 -1.6993 -4.6224 -0.0044 ...
$ Tobacco_current_orig : int 0 1 0 0 0 0 0 0 0 0 ...
$ Alcohol_frequency_orig: int 3 3 5 1 2 3 3 2 3 1 ...
$ Pregnancy : int 0 0 0 NA NA NA 0 0 NA 0 ...
$ Menopause : int 1 0 0 NA NA NA 0 3 NA 1 ...
$ Centre : Factor w/ 22 levels "10003","11001",..: 12 19 21 12 5 20 7 12 1 12 ...
$ WHR : num 0.844 0.901 0.8 0.882 0.963 ...
Gender: Female == 0, Male == 1
Menopause: Split by females and males
# Empty column
UKBpheno["Menopause_new"] <- NA
# Male & NA
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Male" & is.na(Menopause)] <- 0)
# Female & NA
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & is.na(Menopause)] <- 1)
# Female & Yes
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 1] <- 2)
# Female & Hysterectomy
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 2] <- 3)
# Female & Not sure
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 3] <- 4)
# Female & Prefer not to answer
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == -3] <- 5)
# Female & No
UKBpheno <- within(UKBpheno, Menopause_new[Gender == "Female" & Menopause == 0] <- 6)
# Recode as factor
UKBpheno$Menopause_new <- as.factor(UKBpheno$Menopause_new)
# Delete Menopause
UKBpheno$Menopause <- NULL
summary(UKBpheno$Menopause_new)
0 1 2 3 4 5 6 NA's
229131 475 165411 31171 11732 535 64081 82
# OLD
# 0 1 2 3 4 5 6 NA's
# 177552 128 127330 23826 8896 183 47825 2
Pregnancy
# Empty column
UKBpheno["Pregnancy_no_NA"] <- NA
# Male & NA
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Male" & is.na(Pregnancy)] <- 0)
# Female & NA
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & is.na(Pregnancy)] <- 1)
# Female & No
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 0] <- 2)
# Female & Yes
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 1] <- 3)
# Female & Not sure
UKBpheno <- within(UKBpheno, Pregnancy_no_NA[Gender == "Female" & Pregnancy == 2] <- 4)
# Recode as factor
UKBpheno$Pregnancy_no_NA <- as.factor(UKBpheno$Pregnancy_no_NA)
# Delte old Pregnancy column
UKBpheno$Pregnancy <- NULL
summary(UKBpheno$Pregnancy_no_NA)
0 1 2 3 4 NA's
229131 842 272191 150 222 82
# OLD
# 0 1 2 3 4 NA's
# 177552 209 207720 105 154 2
Recode alcohol frequency as ordered factor
UKBpheno$Alcohol_frequency_temp <- UKBpheno$Alcohol_frequency_orig
# Prefer not to answer as -3 -> NA
UKBpheno <- within(UKBpheno, Alcohol_frequency_temp[Alcohol_frequency_temp == "-3"] <- NA)
# Ordered factor
UKBpheno$Alcohol_frequency <- factor(UKBpheno$Alcohol_frequency_temp,
levels = c("6", "5", "4", "3", "2", "1"),
labels = c("Never", "Special occasions only", "One to three times a month",
"Once or twice a week", "Three or four times a week", "Daily or almost daily"),
ordered = TRUE)
UKBpheno$Alcohol_frequency_temp <- NULL
summary(UKBpheno$Alcohol_frequency)
Never Special occasions only
40649 58013
One to three times a month Once or twice a week
55860 129298
Three or four times a week Daily or almost daily
115446 101775
NA's
1577
ggplot(data = subset(UKBpheno, !is.na(Alcohol_frequency)),
aes(x = Alcohol_frequency)) +
geom_histogram(stat = "count") +
labs(y = "Frequency",
title = "Alcohol intake frequency (self-report)",
color = "black") +
theme(panel.grid.major.x = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.title.y = element_blank(),
axis.text.x = element_text(colour="black", size = 12),
axis.text.y = element_text(colour="black", size = 12),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank()) +
scale_y_continuous(labels = scales::comma) +
coord_flip()
Warning: Ignoring unknown parameters: binwidth, bins, pad
Recode current tobacco smoking as ordered factor
UKBpheno$Tobacco_current_temp <- UKBpheno$Tobacco_current_orig
# Prefer not to answer as -3 -> NA
UKBpheno <- within(UKBpheno, Tobacco_current_temp[Tobacco_current_temp == "-3"] <- NA)
# Ordered factor
UKBpheno$Tobacco_current <- factor(UKBpheno$Tobacco_current_temp,
levels = c("0", "1", "2"),
labels = c("No", "Only occasionally", "Yes, on most or all days"),
ordered = TRUE)
UKBpheno$Tobacco_current_temp <- NULL
summary(UKBpheno$Tobacco_current)
No Only occasionally Yes, on most or all days
448244 39244 13735
NA's
1395
ggplot(data = subset(UKBpheno, !is.na(Tobacco_current)),
aes(x = Tobacco_current)) +
geom_histogram(stat = "count") +
labs(y = "Frequency",
title = "Current tobacco use (self-report)") +
theme(panel.grid.major.x = element_line(size = 0.5,
linetype = 'dashed',
colour = "gray"),
axis.title.y = element_blank(),
axis.text.x = element_text(colour="black", size = 12),
axis.text.y = element_text(colour="black", size = 12),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank()) +
scale_y_continuous(labels = scales::comma) +
coord_flip()
Warning: Ignoring unknown parameters: binwidth, bins, pad
Check number of NAs per column
colSums(is.na(UKBpheno))
IID Gender Age
0 75 75
Height Weight BMI
2614 10204 10212
WC HC BFPC
2235 2294 10484
FM FFM SES
11050 10252 698
Tobacco_current_orig Alcohol_frequency_orig Centre
966 972 75
WHR Menopause_new Pregnancy_no_NA
2340 82 82
Alcohol_frequency Tobacco_current
1577 1395
Define columns for complete cases
cols_cc <-c("Gender", "Age",
"Height", "Weight", "BMI", "WC", "HC",
"BFPC", "FM", "FFM",
"SES",
"Tobacco_current_orig", "Alcohol_frequency_orig",
"Centre", "WHR",
"Menopause_new", "Pregnancy_no_NA")
cols_cc
[1] "Gender" "Age"
[3] "Height" "Weight"
[5] "BMI" "WC"
[7] "HC" "BFPC"
[9] "FM" "FFM"
[11] "SES" "Tobacco_current_orig"
[13] "Alcohol_frequency_orig" "Centre"
[15] "WHR" "Menopause_new"
[17] "Pregnancy_no_NA"
Calculate measures adjusted for BMI and Height
# Calculate WHR adjusted for BMI
WHRadjBMImod <- lm(WHR ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$WHRadjBMI <- resid(WHRadjBMImod)
rm(WHRadjBMImod)
summary(UKBpheno$WHRadjBMI)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-0.624 -0.060 0.003 0.000 0.060 1.290 10318
WCadjBMImod <- lm(WC ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$WCadjBMI <- resid(WCadjBMImod)
rm(WCadjBMImod)
summary(UKBpheno$WCadjBMI)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-56.923 -5.723 0.049 0.000 5.763 73.307 10289
HCadjBMImod <- lm(HC ~ BMI, data=UKBpheno, na.action=na.exclude)
UKBpheno$HCadjBMI <- resid(HCadjBMImod)
rm(HCadjBMImod)
summary(UKBpheno$HCadjBMI)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-57.685 -2.972 0.001 0.000 2.974 77.647 10282
WHRadjBFPCmod <- lm(WHR ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$WHRadjBFPC <- resid(WHRadjBFPCmod)
rm(WHRadjBFPCmod)
summary(UKBpheno$WHRadjBFPC)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-0.695 -0.065 0.001 0.000 0.062 1.241 10590
WCadjBFPCmod <- lm(WC ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$WCadjBFPC <- resid(WCadjBFPCmod)
rm(WCadjBFPCmod)
summary(UKBpheno$WCadjBFPC)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-63.829 -10.388 -0.043 0.000 8.985 105.660 10561
HCadjBFPCmod <- lm(HC ~ BFPC, data=UKBpheno, na.action=na.exclude)
UKBpheno$HCadjBFPC <- resid(HCadjBFPCmod)
rm(HCadjBFPCmod)
summary(UKBpheno$HCadjBFPC)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-57.614 -5.411 -0.329 0.000 4.732 81.121 10554
FFMadjHeightmod <- lm(FFM ~ Height, data=UKBpheno, na.action=na.exclude)
UKBpheno$FFMadjHeight <- resid(FFMadjHeightmod)
rm(FFMadjHeightmod)
summary(UKBpheno$FFMadjHeight)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-38.394 -4.656 -0.350 0.000 4.255 38.836 10505
BFPCadjHeightmod <- lm(BFPC ~ Height, data=UKBpheno, na.action=na.exclude)
UKBpheno$BFPCadjHeight <- resid(BFPCadjHeightmod)
rm(BFPCadjHeightmod)
summary(UKBpheno$BFPCadjHeight)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-35.614 -4.998 -0.120 0.000 4.958 36.276 10734
Import additional phenotype data from UKB that has several NAs
UKBpheno_add <- fread(file = "data_raw/2019_02_29_BC/EDcases_pheno_additional.txt", header = TRUE,
data.table = FALSE)
# Recode as factor
UKB_addfactor <-c("Region", "Income",
"Contraceptive_ever", "HRT", "Breastfed",
"BipolarInitialQ")
UKBpheno_add[UKB_addfactor] <- lapply(UKBpheno_add[UKB_addfactor], factor)
# Numeric columns
UKB_add_numeric <-c("IID", "Impedance_wb",
"FM_trunk","FFM_trunk", "BFPC_trunk",
"Birth_weight", "VAT",
"BMC_wb", "BMD_total",
"Menarche_age_at")
Ethnicity
UKBpheno_add$Ethnicity<-factor(with(UKBpheno_add, ifelse(is.na(Ethnicity) | Ethnicity < 0, NA,
ifelse(!is.na(Ethnicity) & Ethnicity > 1000 & Ethnicity < 1999, "European",
ifelse(!is.na(Ethnicity) & Ethnicity > 2000 & Ethnicity < 2999, "Mixed",
ifelse(!is.na(Ethnicity) & Ethnicity > 3000 & Ethnicity < 3999, "Asian",
ifelse(!is.na(Ethnicity) & Ethnicity > 4000 & Ethnicity < 4999, "African",
ifelse(!is.na(Ethnicity) & Ethnicity == 5, "Chinese",
ifelse(!is.na(Ethnicity) & Ethnicity == 6, "Other", NA)))))))),
levels=c("European", "African", "Asian", "Chinese", "Mixed", "Other"))
summary(UKBpheno_add$Ethnicity)
European African Asian Chinese Mixed Other NA's
472161 8035 9839 1574 2909 4559 3541
str(UKBpheno_add)
'data.frame': 502618 obs. of 17 variables:
$ IID : int 1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
$ BFPC_trunk : num 40.3 40.8 25 21.7 33.9 26.8 32.2 23.9 29.7 41.1 ...
$ FM_trunk : num 16.3 17.9 7.2 9.8 17.7 14.2 10.6 8 17.4 17.7 ...
$ FFM_trunk : num 24.1 25.9 21.6 35.4 34.5 38.8 22.2 25.5 41.1 25.3 ...
$ Impedance_wb : int 653 761 671 491 569 477 720 623 499 653 ...
$ Ethnicity : Factor w/ 6 levels "European","African",..: 1 2 1 1 1 1 1 1 1 1 ...
$ Region : Factor w/ 16 levels "1","2","3","4",..: 5 5 5 6 10 5 5 6 5 5 ...
$ Income : Factor w/ 7 levels "-3","-1","1",..: 4 5 4 5 5 5 1 5 NA 4 ...
$ Birth_weight : num 2.83 NA 3.18 NA 3.37 2.95 NA NA 3.12 2.72 ...
$ Menarche_age_at : int 13 14 12 NA NA NA 16 13 NA -1 ...
$ Contraceptive_ever: Factor w/ 4 levels "-3","-1","0",..: 4 4 4 NA NA NA 4 4 NA 3 ...
$ HRT : Factor w/ 4 levels "-3","-1","0",..: 4 3 3 NA NA NA 3 4 NA 3 ...
$ Breastfed : Factor w/ 4 levels "-3","-1","0",..: 2 2 4 4 3 4 3 2 4 3 ...
$ VAT : int NA NA NA NA NA NA NA NA NA NA ...
$ BMC_wb : int NA NA NA NA NA NA NA NA NA NA ...
$ BMD_total : num NA NA NA NA NA NA NA NA NA NA ...
$ BipolarInitialQ : Factor w/ 2 levels "1","2": NA NA NA NA NA NA NA NA NA NA ...
colSums(is.na(UKBpheno_add))
IID BFPC_trunk FM_trunk
0 10506 10533
FFM_trunk Impedance_wb Ethnicity
10617 10263 3541
Region Income Birth_weight
5126 6092 225606
Menarche_age_at Contraceptive_ever HRT
229681 229684 229684
Breastfed VAT BMC_wb
973 497509 497448
BMD_total BipolarInitialQ
497448 501003
Merge UKBpheno data and additional UKB data
nrow(UKBpheno)
[1] 502618
UKBpheno_merged <- merge(UKBpheno, UKBpheno_add, all.x = TRUE, sort = FALSE)
dim(UKBpheno_merged)
[1] 502618 44
Import Mental Health Questionnaire (MHQ) data
MHQ <- fread(file = "data_raw/2019_02_29_BC/EDcases_MHQ.txt", header = TRUE,
data.table = FALSE)
dim(MHQ)
[1] 157358 63
# Rename ID column
colnames(MHQ)[colnames(MHQ)=="f.eid"] <- "IID"
# Add MHQ column binary, Yes == 1, MHQ answered
MHQ$MHQ <- 1
# Extract columns
SRmdxcols <- c("SRSchizophrenia", "SRPsychosisOther", "SRDepression",
"SRManiaBIP", "SRGADandOthers", "SRPanicAttacks",
"SRAgoraphobia", "SRSocPhobia", "SROtherPhobia",
"SROCD", "SRPersonalityDisorder", "SRAnorexiaNervosa",
"SRBulimiaNervosa", "SRBingeEating", "SRASD", "SRADHD",
"SRPNTA")
# Create new binary column for self-reported cancer
MHQ$SRAnyMDX <- apply(MHQ[,SRmdxcols] == 1, 1, any)
# Recode as factor
MHQ$SRAnyMDX <- factor(MHQ$SRAnyMDX, labels = c(0,1))
summary(MHQ$SRAnyMDX)
0 1
107259 50099
# Recode as factor
UKBfactor <-c("Migrant.Status", "Highest.Qualification",
"Smoker",
"Longstanding.Illness", "Diabetes", "Cancer", "CVD", "Respiratory",
"SRSocPhobia", "SRSchizophrenia", "SRPsychosisOther", "SRPsychosisAny",
"SRPersonalityDisorder", "SROtherPhobia",
"SRPanicAttacks", "SROCD", "SRManiaBIP", "SRDepression", "SRMood",
"SRBulimiaNervosa", "SRBingeEating", "SRASD", "SRGADandOthers", "SRAnorexiaNervosa",
"SREatingDisorderAny", "SRAgoraphobia", "SRAnxietyAny", "SRADHD",
"SmithDepression",
"PHQ9.No.Info", "PHQ9.Screen", "PHQ9.Items",
"GAD.Ever", "GAD.Current",
"Depressed.Ever", "Depressed.Ever.Severe", "Recurrent.Depression", "Single.Depression",
"SmithBipolar",
"Cannabis.Ever", "Cannabis.Daily", "Addiction.Ever.SelfReport", "Addiction.Ever",
"Substance.Addiction.Ever", "Alcohol.Dependence.Ever", "Addiction.Current",
"Alcohol.Use.Disorder",
"Trauma.Childhood", "Trauma.Adult", "Trauma.Catastrophic", "PTSD",
"Self.Harm.Ever", "Self.Harm.Suicide.Attempt", "NoSRConditions", "MHQ")
MHQ[UKBfactor] <- lapply(MHQ[UKBfactor], factor)
# Numeric columns
UKBnumeric <-c("IID", "Age.At.MHQ", "Neuroticism", "AUDIT.Score", "WellbeingScore",
"PHQ9.Severity", "GAD7.Severity")
Code variables for anorexia nervosa (AN), bulimia nervosa (BN), or binge-eating disorder (BED) without comorbidity
# SRANpure (no comorbidity)
MHQ["SRANpure"] <- NA
MHQ <- within(MHQ, SRANpure[SRAnorexiaNervosa == 1 &
(SRBulimiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
SRBingeEating == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
SRADHD == 0)] <- 1)
MHQ <- within(MHQ, SRANpure[is.na(SRANpure) ] <- 0)
MHQ$SRANpure <- as.factor(MHQ$SRANpure)
summary(MHQ$SRANpure)
0 1
156476 882
# OLD
# 0 1
# 346039 39703
# SRBNpure (no comorbidity)
MHQ["SRBNpure"] <- NA
MHQ <- within(MHQ, SRBNpure[SRBulimiaNervosa == 1 &
(SRAnorexiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
SRBingeEating == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
SRADHD == 0)] <- 1)
MHQ <- within(MHQ, SRBNpure[is.na(SRBNpure) ] <- 0)
MHQ$SRBNpure <- as.factor(MHQ$SRBNpure)
summary(MHQ$SRBNpure)
0 1
156864 494
#OLD
# 0 1
# 345716 40026
# SRBEDpure (no comorbidity)
MHQ["SRBEDpure"] <- NA
MHQ <- within(MHQ, SRBEDpure[SRBingeEating == 1 &
(SRAnorexiaNervosa == 0 | SRSocPhobia == 0 | SRSchizophrenia == 0 |
SRPsychosisOther == 0 | SRPersonalityDisorder == 0 | SROtherPhobia == 0 |
SRPanicAttacks == 0 | SROCD == 0 | SRManiaBIP == 0 | SRDepression == 0 |
SRBulimiaNervosa == 0 | SRASD == 0 | SRGADandOthers == 0 | SRAgoraphobia == 0 |
SRADHD == 0)] <- 1)
MHQ <- within(MHQ, SRBEDpure[is.na(SRBEDpure) ] <- 0)
MHQ$SRBEDpure <- as.factor(MHQ$SRBEDpure)
summary(MHQ$SRBEDpure)
0 1
156660 698
# OLD
# 0 1
# 345868 39874
Import dichotomised medication
meds_orig <- fread(file = "data_raw/2019_02_29_BC/med_classes.txt", header = TRUE,
data.table = FALSE)
dim(meds_orig)
[1] 502619 16
colnames(meds_orig)[colnames(meds_orig)=="id"] <- "IID"
colnames(meds_orig)[colnames(meds_orig)=="Diabetes"] <- "Antidiabetics"
colnames(meds_orig)[colnames(meds_orig)=="HIV"] <- "AntiretroviralMed"
colnames(meds_orig)[colnames(meds_orig)=="Osteoporosis"] <- "Antiosteoporotics"
colnames(meds_orig)[colnames(meds_orig)=="Thyroid"] <- "Thyreostatics"
colnames(meds_orig)[colnames(meds_orig)=="Tuberculosis_Leprosy"] <- "AntitubercularAntileproticMed"
meds_orig$Anxiolytics <- NULL
# Recode as factor
meds_orig_colnames <- colnames(meds_orig)
meds_origfactor <- meds_orig_colnames[-1]
meds_orig[meds_origfactor] <- lapply(meds_orig[meds_origfactor], factor)
# Numeric columns
# Pregnancy and Menopause are numeric for recoding
meds_orignumeric <- c("IID")
meds_anx <- fread(file = "data_raw/2019_02_29_BC/med_anx.txt", header = TRUE,
data.table = FALSE)
colnames(meds_anx)[colnames(meds_anx)=="id"] <- "IID"
meds_anx$Anxiolytics <- as.factor(meds_anx$Anxiolytics)
meds <- merge(meds_orig, meds_anx, all = TRUE)
# Check structure
str(meds)
'data.frame': 502619 obs. of 16 variables:
$ IID : int 1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
$ Corticoids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Antidiabetics : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 1 1 1 ...
$ Diuretics : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Gonadotropins : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Growth_Hormone : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ AntiretroviralMed : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ HRT_Contraceptives : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
$ Antiosteoporotics : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Testosterone : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Thyreostatics : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 2 ...
$ AntitubercularAntileproticMed: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Antidepressants : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Antineoplastics : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ Antipsychotics : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Anxiolytics : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
Additional category for metabolic medication and psychotropic medication
# Metabolic medication
meds["MetabolicMed"] <- NA
meds <- within(meds,
MetabolicMed[Corticoids == 1 | Antidiabetics == 1 | Diuretics == 1 |
Gonadotropins == 1 | Growth_Hormone == 1 | AntiretroviralMed == 1 |
HRT_Contraceptives == 1 | Antiosteoporotics == 1 | Testosterone == 1 |
Thyreostatics == 1 | AntitubercularAntileproticMed == 1 |
Antidepressants == 1 | Antineoplastics == 1 |
Antipsychotics == 1] <- 1)
meds <- within(meds, MetabolicMed[is.na(MetabolicMed)] <- 0)
meds$MetabolicMed <- as.factor(meds$MetabolicMed)
summary(meds$MetabolicMed)
0 1
360327 142292
meds["PsychotropicMed"] <- NA
meds <- within(meds,
PsychotropicMed[Corticoids == 1 | Testosterone == 1 |
Antidepressants == 1 | Anxiolytics == 1 |
Antipsychotics == 1] <- 1)
meds <- within(meds, PsychotropicMed[is.na(PsychotropicMed)] <- 0)
meds$PsychotropicMed <- as.factor(meds$PsychotropicMed)
summary(meds$PsychotropicMed)
0 1
450196 52423
dx <- fread(file = "data_raw/2019_02_29_BC/dx_classes.txt", header = TRUE,
data.table = FALSE)
dim(dx)
[1] 502619 32
# Recode as factor
dx_colnames <- colnames(dx)
dxfactor <- dx_colnames[-1]
dx[dxfactor] <- lapply(dx[dxfactor], factor)
# Numeric columns
# Pregnancy and Menopause are numeric for recoding
dxnumeric <- c("IID")
# Create variable: any cancer diagnosis from hospital records (HES) or the cancer register
dx["DxCancerAny"] <- NA
dx <- within(dx,
DxCancerAny[DxCancer1st == 1 | DxCancer2nd == 1 | CancerRegister == 1] <- 1)
dx <- within(dx, DxCancerAny[is.na(DxCancerAny)] <- 0)
dx$DxCancerAny <- as.factor(dx$DxCancerAny)
summary(dx$DxCancerAny)
0 1
429641 72978
# Create variable: any diagnoses affecting body composition
dx["DxBodyCompAny"] <- NA
dx <- within(dx,
DxBodyCompAny[DxCancer1st == 1 | DxCancer2nd == 1 | CancerRegister == 1 |
DxConnectiveTissue1st == 1 | DxConnectiveTissue2nd == 1 |
DxDiabetes1st == 1 | DxDxdiabetes2nd == 1 |
DxEndocrine1st == 1 | DxEndocrine2nd == 1 |
DxGlucose1st == 1 | DxGlucose2nd == 1 |
DxHIV1st == 1 | DxHIV2nd == 1 |
DxIBD1st == 1 | DxIBD2nd == 1 |
DxIBS1st == 1 | DxIBS2nd == 1 |
DxLiver1st == 1 | DxLiver2nd == 1 |
DxPsy1st == 1 | DxPsy2nd == 1 |
DxMetabolic1st == 1 | DxMetabolic2nd == 1 |
DxMuscles1st == 1 | DxMuscles2nd == 1 |
DxPancreatitis1st == 1 | DxPancreatitis2nd == 1 |
DxThyroid1st == 1 | DxThyroid2nd == 1 |
DxTuberculosis1st == 1 | DxTuberculosis2nd == 1] <- 1)
dx <- within(dx, DxBodyCompAny[is.na(DxBodyCompAny)] <- 0)
dx$DxBodyCompAny <- as.factor(dx$DxBodyCompAny)
summary(dx$DxBodyCompAny)
0 1
330752 171867
# Create new data frame
dxmerged <- as.data.frame(dx[,c("IID", "DxCancerAny", "DxBodyCompAny", "CancerRegister")])
# New data frame with merged diagnoses
dxmerged$DxConnectiveTissue <- ifelse(dx$DxConnectiveTissue1st == 1 | dx$DxConnectiveTissue2nd == 1, 1, 0)
dxmerged$DxDiabetes <- ifelse(dx$DxDiabetes1st == 1 | dx$DxDxdiabetes2nd == 1, 1, 0)
dxmerged$DxEndocrine <- ifelse(dx$DxEndocrine1st == 1 | dx$DxEndocrine2nd == 1, 1, 0)
dxmerged$DxGlucose <- ifelse(dx$DxGlucose1st == 1 | dx$DxGlucose2nd == 1, 1, 0)
dxmerged$DxHIV <- ifelse(dx$DxHIV1st == 1 | dx$DxHIV2nd == 1, 1, 0)
dxmerged$DxIBD <- ifelse(dx$DxIBD1st == 1 | dx$DxIBD2nd == 1, 1, 0)
dxmerged$DxIBS <- ifelse(dx$DxIBS1st == 1 | dx$DxIBS2nd == 1, 1, 0)
dxmerged$DxLiver <- ifelse(dx$DxLiver1st == 1 | dx$DxLiver2nd == 1, 1, 0)
dxmerged$DxPsy <- ifelse(dx$DxPsy1st == 1 | dx$DxPsy2nd == 1, 1, 0)
dxmerged$DxMetabolic <- ifelse(dx$DxMetabolic1st == 1 | dx$DxMetabolic2nd == 1, 1, 0)
dxmerged$DxMuscles <- ifelse(dx$DxMuscles1st == 1 | dx$DxMuscles2nd == 1, 1, 0)
dxmerged$DxPancreatitis <- ifelse(dx$DxPancreatitis1st == 1 | dx$DxPancreatitis2nd == 1, 1, 0)
dxmerged$DxThyroid <- ifelse(dx$DxThyroid1st == 1 | dx$DxThyroid2nd == 1, 1, 0)
dxmerged$DxTuberculosis <- ifelse(dx$DxTuberculosis1st == 1 | dx$DxTuberculosis2nd == 1, 1, 0)
# Recode as factor
dxmerged_colnames <- colnames(dxmerged)
dxmergedfactor <- dxmerged_colnames[-3]
dxmerged[dxmergedfactor] <- lapply(dxmerged[dxmergedfactor], factor)
summary(dxmerged)
IID DxCancerAny DxBodyCompAny CancerRegister
1000015: 1 0:429641 0:330752 0:437662
1000027: 1 1: 72978 1:171867 1: 64957
1000039: 1
1000040: 1
1000053: 1
1000064: 1
(Other):502613
DxConnectiveTissue DxDiabetes DxEndocrine DxGlucose DxHIV
0:498947 0:476606 0:499402 0:501410 0:502408
1: 3672 1: 26013 1: 3217 1: 1209 1: 211
DxIBD DxIBS DxLiver DxPsy DxMetabolic DxMuscles
0:479134 0:495942 0:496401 0:463308 0:447983 0:501216
1: 23485 1: 6677 1: 6218 1: 39311 1: 54636 1: 1403
DxPancreatitis DxThyroid DxTuberculosis
0:501978 0:481757 0:502334
1: 641 1: 20862 1: 285
ICDF <- fread(file = "data_raw/2019_03_02_EDcases/ICD_F.txt",
header = TRUE,
data.table = FALSE)
dim(ICDF)
[1] 502619 525
ICDEDraw <- ICDF %>%
select(., IID, contains("F50"))
ICDEDraw$F500 <- with(ICDEDraw, ifelse(F500m == 1 | F500s == 1, 1, 0))
ICDEDraw$F501 <- with(ICDEDraw, ifelse(F501m == 1 | F501s == 1, 1, 0))
ICDEDraw$F502 <- with(ICDEDraw, ifelse(F502m == 1 | F502s == 1, 1, 0))
ICDEDraw$F505 <- with(ICDEDraw, ifelse(F505m == 1 | F505s == 1, 1, 0))
ICDEDraw$F508 <- with(ICDEDraw, ifelse(F508m == 1 | F508s == 1, 1, 0))
ICDEDraw$F509 <- with(ICDEDraw, ifelse(F509m == 1 | F509s == 1, 1, 0))
ICDED <- ICDEDraw %>%
select(., IID, matches("\\d$"))
rm(ICDEDraw)
# Recode as factor
ICDEDfactor <- colnames(ICDED)
ICDED[ICDEDfactor] <- lapply(ICDED[ICDEDfactor], factor)
# Check structure
str(ICDED)
'data.frame': 502619 obs. of 7 variables:
$ IID : Factor w/ 502619 levels "1000015","1000027",..: 1 2 3 4 5 6 7 8 9 10 ...
$ F500: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ F501: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ F502: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ F505: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ F508: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ F509: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
summary(ICDED)
IID F500 F501 F502 F505 F508
1000015: 1 0:502560 0:502616 0:502593 0:502616 0:502613
1000027: 1 1: 59 1: 3 1: 26 1: 3 1: 6
1000039: 1
1000040: 1
1000053: 1
1000064: 1
(Other):502613
F509
0:502588
1: 31
dxICD <- merge(dxmerged, ICDED, all = TRUE, sort = FALSE)
SRillness <- fread(input = "data_raw/2019_02_29_BC/SRillness_columns.txt",
header = TRUE,
data.table = FALSE)
dim(SRillness)
[1] 502618 88
# Rename ID column
colnames(SRillness)[colnames(SRillness)=="f.eid"] <- "IID"
#Vector with codes for self-reported illness to exclude
SRillness_exclusion = c(1136, 1154, 1155, 1156, 1157, 1158, 1164, 1165, 1192,
1193, 1194, 1220, 1222, 1223, 1224, 1225, 1226, 1228,
1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238,
1239, 1243, 1252, 1259, 1260, 1262, 1263, 1276, 1286,
1287, 1289, 1290, 1291, 1293, 1297, 1308, 1309, 1310,
1313, 1322, 1350, 1373, 1376, 1377, 1378, 1379, 1380,
1381, 1382, 1383, 1384, 1403, 1404, 1408, 1409, 1410,
1428, 1429, 1430, 1431, 1432, 1437, 1439, 1440, 1456,
1461, 1462, 1463, 1464, 1468, 1469, 1470, 1477, 1480,
1481, 1519, 1520, 1521, 1522, 1531, 1556, 1579, 1580,
1604, 1607, 1608, 1609, 1611, 1615, 1617, 1657, 1664, 1682)
# Logic to exclude those ilnesses
# Any visit
SRillness$SRAnyIllnessBC <-
with(SRillness, ifelse(!(f.20002.0.0 %in% SRillness_exclusion) & !(f.20002.0.1 %in% SRillness_exclusion) &
!(f.20002.0.2 %in% SRillness_exclusion) & !(f.20002.0.3 %in% SRillness_exclusion) &
!(f.20002.0.4 %in% SRillness_exclusion) & !(f.20002.0.5 %in% SRillness_exclusion) &
!(f.20002.0.6 %in% SRillness_exclusion) & !(f.20002.0.7 %in% SRillness_exclusion) &
!(f.20002.0.8 %in% SRillness_exclusion) & !(f.20002.0.9 %in% SRillness_exclusion) &
!(f.20002.0.10 %in% SRillness_exclusion) & !(f.20002.0.11 %in% SRillness_exclusion) &
!(f.20002.0.12 %in% SRillness_exclusion) & !(f.20002.0.13 %in% SRillness_exclusion) &
!(f.20002.0.14 %in% SRillness_exclusion) & !(f.20002.0.15 %in% SRillness_exclusion) &
!(f.20002.0.16 %in% SRillness_exclusion) & !(f.20002.0.17 %in% SRillness_exclusion) &
!(f.20002.0.18 %in% SRillness_exclusion) & !(f.20002.0.19 %in% SRillness_exclusion) &
!(f.20002.0.20 %in% SRillness_exclusion) & !(f.20002.0.21 %in% SRillness_exclusion) &
!(f.20002.0.22 %in% SRillness_exclusion) & !(f.20002.0.23 %in% SRillness_exclusion) &
!(f.20002.0.24 %in% SRillness_exclusion) & !(f.20002.0.25 %in% SRillness_exclusion) &
!(f.20002.0.26 %in% SRillness_exclusion) & !(f.20002.0.27 %in% SRillness_exclusion) &
!(f.20002.0.28 %in% SRillness_exclusion) &
!(f.20002.1.0 %in% SRillness_exclusion) &
!(f.20002.1.1 %in% SRillness_exclusion) & !(f.20002.1.2 %in% SRillness_exclusion) &
!(f.20002.1.3 %in% SRillness_exclusion) & !(f.20002.1.4 %in% SRillness_exclusion) &
!(f.20002.1.5 %in% SRillness_exclusion) & !(f.20002.1.6 %in% SRillness_exclusion) &
!(f.20002.1.7 %in% SRillness_exclusion) & !(f.20002.1.8 %in% SRillness_exclusion) &
!(f.20002.1.9 %in% SRillness_exclusion) & !(f.20002.1.10 %in% SRillness_exclusion) &
!(f.20002.1.11 %in% SRillness_exclusion) & !(f.20002.1.12 %in% SRillness_exclusion) &
!(f.20002.1.13 %in% SRillness_exclusion) & !(f.20002.1.14 %in% SRillness_exclusion) &
!(f.20002.1.15 %in% SRillness_exclusion) &
!(f.20002.2.0 %in% SRillness_exclusion) &
!(f.20002.2.1 %in% SRillness_exclusion) & !(f.20002.2.2 %in% SRillness_exclusion) &
!(f.20002.2.3 %in% SRillness_exclusion) & !(f.20002.2.4 %in% SRillness_exclusion) &
!(f.20002.2.5 %in% SRillness_exclusion) & !(f.20002.2.6 %in% SRillness_exclusion) &
!(f.20002.2.7 %in% SRillness_exclusion) & !(f.20002.2.8 %in% SRillness_exclusion) &
!(f.20002.2.9 %in% SRillness_exclusion) & !(f.20002.2.10 %in% SRillness_exclusion) &
!(f.20002.2.11 %in% SRillness_exclusion) & !(f.20002.2.12 %in% SRillness_exclusion) &
!(f.20002.2.13 %in% SRillness_exclusion) & !(f.20002.2.14 %in% SRillness_exclusion) &
!(f.20002.2.15 %in% SRillness_exclusion) & !(f.20002.2.16 %in% SRillness_exclusion),
0, 1))
SRillness$SRAnyIllnessBC <- as.factor(SRillness$SRAnyIllnessBC)
summary(SRillness$SRAnyIllnessBC)
0 1
374256 128362
# Baseline visit .0
SRillness$SRAnyIllnessBC.0 <-
with(SRillness, ifelse(!(f.20002.0.0 %in% SRillness_exclusion) & !(f.20002.0.1 %in% SRillness_exclusion) &
!(f.20002.0.2 %in% SRillness_exclusion) & !(f.20002.0.3 %in% SRillness_exclusion) &
!(f.20002.0.4 %in% SRillness_exclusion) & !(f.20002.0.5 %in% SRillness_exclusion) &
!(f.20002.0.6 %in% SRillness_exclusion) & !(f.20002.0.7 %in% SRillness_exclusion) &
!(f.20002.0.8 %in% SRillness_exclusion) & !(f.20002.0.9 %in% SRillness_exclusion) &
!(f.20002.0.10 %in% SRillness_exclusion) & !(f.20002.0.11 %in% SRillness_exclusion) &
!(f.20002.0.12 %in% SRillness_exclusion) & !(f.20002.0.13 %in% SRillness_exclusion) &
!(f.20002.0.14 %in% SRillness_exclusion) & !(f.20002.0.15 %in% SRillness_exclusion) &
!(f.20002.0.16 %in% SRillness_exclusion) & !(f.20002.0.17 %in% SRillness_exclusion) &
!(f.20002.0.18 %in% SRillness_exclusion) & !(f.20002.0.19 %in% SRillness_exclusion) &
!(f.20002.0.20 %in% SRillness_exclusion) & !(f.20002.0.21 %in% SRillness_exclusion) &
!(f.20002.0.22 %in% SRillness_exclusion) & !(f.20002.0.23 %in% SRillness_exclusion) &
!(f.20002.0.24 %in% SRillness_exclusion) & !(f.20002.0.25 %in% SRillness_exclusion) &
!(f.20002.0.26 %in% SRillness_exclusion) & !(f.20002.0.27 %in% SRillness_exclusion) &
!(f.20002.0.28 %in% SRillness_exclusion),
0, 1))
SRillness$SRAnyIllnessBC.0 <- as.factor(SRillness$SRAnyIllnessBC.0)
summary(SRillness$SRAnyIllnessBC.0)
0 1
378244 124374
# First repeat .1
SRillness$SRAnyIllnessBC.1 <-
with(SRillness, ifelse(!(f.20002.1.1 %in% SRillness_exclusion) & !(f.20002.1.2 %in% SRillness_exclusion) &
!(f.20002.1.3 %in% SRillness_exclusion) & !(f.20002.1.4 %in% SRillness_exclusion) &
!(f.20002.1.5 %in% SRillness_exclusion) & !(f.20002.1.6 %in% SRillness_exclusion) &
!(f.20002.1.7 %in% SRillness_exclusion) & !(f.20002.1.8 %in% SRillness_exclusion) &
!(f.20002.1.9 %in% SRillness_exclusion) & !(f.20002.1.10 %in% SRillness_exclusion) &
!(f.20002.1.11 %in% SRillness_exclusion) & !(f.20002.1.12 %in% SRillness_exclusion) &
!(f.20002.1.13 %in% SRillness_exclusion) & !(f.20002.1.14 %in% SRillness_exclusion) &
!(f.20002.1.15 %in% SRillness_exclusion),
0, 1))
SRillness$SRAnyIllnessBC.1 <- as.factor(SRillness$SRAnyIllnessBC.1)
summary(SRillness$SRAnyIllnessBC.1)
0 1
498211 4407
# Logic to exclude those ilnesses
# Any visit
SRillness$SRAnyIllnessBC.2 <-
with(SRillness, ifelse(!(f.20002.2.0 %in% SRillness_exclusion) &
!(f.20002.2.1 %in% SRillness_exclusion) & !(f.20002.2.2 %in% SRillness_exclusion) &
!(f.20002.2.3 %in% SRillness_exclusion) & !(f.20002.2.4 %in% SRillness_exclusion) &
!(f.20002.2.5 %in% SRillness_exclusion) & !(f.20002.2.6 %in% SRillness_exclusion) &
!(f.20002.2.7 %in% SRillness_exclusion) & !(f.20002.2.8 %in% SRillness_exclusion) &
!(f.20002.2.9 %in% SRillness_exclusion) & !(f.20002.2.10 %in% SRillness_exclusion) &
!(f.20002.2.11 %in% SRillness_exclusion) & !(f.20002.2.12 %in% SRillness_exclusion) &
!(f.20002.2.13 %in% SRillness_exclusion) & !(f.20002.2.14 %in% SRillness_exclusion) &
!(f.20002.2.15 %in% SRillness_exclusion) & !(f.20002.2.16 %in% SRillness_exclusion),
0, 1))
SRillness$SRAnyIllnessBC.2 <- as.factor(SRillness$SRAnyIllnessBC.2)
summary(SRillness$SRAnyIllnessBC.2)
0 1
498903 3715
Self-reported psychiatric disorder
SRPsy_exclusion = c(1286, # depression
1287, # anxiety, panic attacks
1289, # schizophrenia
1290, # self-harm, suicide attempts
1291, # mania, bipolar
1408, # alcohol
1409, # opoid
1410, # substance
1469, # PTSD
1470, # AN, BN, OED
1615) # OCD
SRillness$SRAnyPsyIllnessBC <-
with(SRillness, ifelse(!(f.20002.0.0 %in% SRPsy_exclusion) & !(f.20002.0.1 %in% SRPsy_exclusion) &
!(f.20002.0.2 %in% SRPsy_exclusion) & !(f.20002.0.3 %in% SRPsy_exclusion) &
!(f.20002.0.4 %in% SRPsy_exclusion) & !(f.20002.0.5 %in% SRPsy_exclusion) &
!(f.20002.0.6 %in% SRPsy_exclusion) & !(f.20002.0.7 %in% SRPsy_exclusion) &
!(f.20002.0.8 %in% SRPsy_exclusion) & !(f.20002.0.9 %in% SRPsy_exclusion) &
!(f.20002.0.10 %in% SRPsy_exclusion) & !(f.20002.0.11 %in% SRPsy_exclusion) &
!(f.20002.0.12 %in% SRPsy_exclusion) & !(f.20002.0.13 %in% SRPsy_exclusion) &
!(f.20002.0.14 %in% SRPsy_exclusion) & !(f.20002.0.15 %in% SRPsy_exclusion) &
!(f.20002.0.16 %in% SRPsy_exclusion) & !(f.20002.0.17 %in% SRPsy_exclusion) &
!(f.20002.0.18 %in% SRPsy_exclusion) & !(f.20002.0.19 %in% SRPsy_exclusion) &
!(f.20002.0.20 %in% SRPsy_exclusion) & !(f.20002.0.21 %in% SRPsy_exclusion) &
!(f.20002.0.22 %in% SRPsy_exclusion) & !(f.20002.0.23 %in% SRPsy_exclusion) &
!(f.20002.0.24 %in% SRPsy_exclusion) & !(f.20002.0.25 %in% SRPsy_exclusion) &
!(f.20002.0.26 %in% SRPsy_exclusion) & !(f.20002.0.27 %in% SRPsy_exclusion) &
!(f.20002.0.28 %in% SRPsy_exclusion) &
!(f.20002.1.0 %in% SRPsy_exclusion) &
!(f.20002.1.1 %in% SRPsy_exclusion) & !(f.20002.1.2 %in% SRPsy_exclusion) &
!(f.20002.1.3 %in% SRPsy_exclusion) & !(f.20002.1.4 %in% SRPsy_exclusion) &
!(f.20002.1.5 %in% SRPsy_exclusion) & !(f.20002.1.6 %in% SRPsy_exclusion) &
!(f.20002.1.7 %in% SRPsy_exclusion) & !(f.20002.1.8 %in% SRPsy_exclusion) &
!(f.20002.1.9 %in% SRPsy_exclusion) & !(f.20002.1.10 %in% SRPsy_exclusion) &
!(f.20002.1.11 %in% SRPsy_exclusion) & !(f.20002.1.12 %in% SRPsy_exclusion) &
!(f.20002.1.13 %in% SRPsy_exclusion) & !(f.20002.1.14 %in% SRPsy_exclusion) &
!(f.20002.1.15 %in% SRPsy_exclusion) &
!(f.20002.2.0 %in% SRPsy_exclusion) &
!(f.20002.2.1 %in% SRPsy_exclusion) & !(f.20002.2.2 %in% SRPsy_exclusion) &
!(f.20002.2.3 %in% SRPsy_exclusion) & !(f.20002.2.4 %in% SRPsy_exclusion) &
!(f.20002.2.5 %in% SRPsy_exclusion) & !(f.20002.2.6 %in% SRPsy_exclusion) &
!(f.20002.2.7 %in% SRPsy_exclusion) & !(f.20002.2.8 %in% SRPsy_exclusion) &
!(f.20002.2.9 %in% SRPsy_exclusion) & !(f.20002.2.10 %in% SRPsy_exclusion) &
!(f.20002.2.11 %in% SRPsy_exclusion) & !(f.20002.2.12 %in% SRPsy_exclusion) &
!(f.20002.2.13 %in% SRPsy_exclusion) & !(f.20002.2.14 %in% SRPsy_exclusion) &
!(f.20002.2.15 %in% SRPsy_exclusion) & !(f.20002.2.16 %in% SRPsy_exclusion),
0, 1))
SRillness$SRAnyPsyIllness <- as.factor(SRillness$SRAnyPsyIllness)
summary(SRillness$SRAnyPsyIllness)
0 1
464923 37695
Self-reported neurolgical disorder
# Self-reported neurological disorder
SRNeuro_exclusion = c(1082, 1083, 1086, 1524, 1262, 1397, 1683, 1245, 1246,
1491, 1425, 1433, 1258, 1263, 1264, 1266, 1244, 1583,
1659, 1259, 1240, 1434)
SRillness$SRAnyNeuroIllnessBC <-
with(SRillness, ifelse(!(f.20002.0.0 %in% SRNeuro_exclusion) & !(f.20002.0.1 %in% SRNeuro_exclusion) &
!(f.20002.0.2 %in% SRNeuro_exclusion) & !(f.20002.0.3 %in% SRNeuro_exclusion) &
!(f.20002.0.4 %in% SRNeuro_exclusion) & !(f.20002.0.5 %in% SRNeuro_exclusion) &
!(f.20002.0.6 %in% SRNeuro_exclusion) & !(f.20002.0.7 %in% SRNeuro_exclusion) &
!(f.20002.0.8 %in% SRNeuro_exclusion) & !(f.20002.0.9 %in% SRNeuro_exclusion) &
!(f.20002.0.10 %in% SRNeuro_exclusion) & !(f.20002.0.11 %in% SRNeuro_exclusion) &
!(f.20002.0.12 %in% SRNeuro_exclusion) & !(f.20002.0.13 %in% SRNeuro_exclusion) &
!(f.20002.0.14 %in% SRNeuro_exclusion) & !(f.20002.0.15 %in% SRNeuro_exclusion) &
!(f.20002.0.16 %in% SRNeuro_exclusion) & !(f.20002.0.17 %in% SRNeuro_exclusion) &
!(f.20002.0.18 %in% SRNeuro_exclusion) & !(f.20002.0.19 %in% SRNeuro_exclusion) &
!(f.20002.0.20 %in% SRNeuro_exclusion) & !(f.20002.0.21 %in% SRNeuro_exclusion) &
!(f.20002.0.22 %in% SRNeuro_exclusion) & !(f.20002.0.23 %in% SRNeuro_exclusion) &
!(f.20002.0.24 %in% SRNeuro_exclusion) & !(f.20002.0.25 %in% SRNeuro_exclusion) &
!(f.20002.0.26 %in% SRNeuro_exclusion) & !(f.20002.0.27 %in% SRNeuro_exclusion) &
!(f.20002.0.28 %in% SRNeuro_exclusion) &
!(f.20002.1.0 %in% SRNeuro_exclusion) &
!(f.20002.1.1 %in% SRNeuro_exclusion) & !(f.20002.1.2 %in% SRNeuro_exclusion) &
!(f.20002.1.3 %in% SRNeuro_exclusion) & !(f.20002.1.4 %in% SRNeuro_exclusion) &
!(f.20002.1.5 %in% SRNeuro_exclusion) & !(f.20002.1.6 %in% SRNeuro_exclusion) &
!(f.20002.1.7 %in% SRNeuro_exclusion) & !(f.20002.1.8 %in% SRNeuro_exclusion) &
!(f.20002.1.9 %in% SRNeuro_exclusion) & !(f.20002.1.10 %in% SRNeuro_exclusion) &
!(f.20002.1.11 %in% SRNeuro_exclusion) & !(f.20002.1.12 %in% SRNeuro_exclusion) &
!(f.20002.1.13 %in% SRNeuro_exclusion) & !(f.20002.1.14 %in% SRNeuro_exclusion) &
!(f.20002.1.15 %in% SRNeuro_exclusion) &
!(f.20002.2.0 %in% SRNeuro_exclusion) &
!(f.20002.2.1 %in% SRNeuro_exclusion) & !(f.20002.2.2 %in% SRNeuro_exclusion) &
!(f.20002.2.3 %in% SRNeuro_exclusion) & !(f.20002.2.4 %in% SRNeuro_exclusion) &
!(f.20002.2.5 %in% SRNeuro_exclusion) & !(f.20002.2.6 %in% SRNeuro_exclusion) &
!(f.20002.2.7 %in% SRNeuro_exclusion) & !(f.20002.2.8 %in% SRNeuro_exclusion) &
!(f.20002.2.9 %in% SRNeuro_exclusion) & !(f.20002.2.10 %in% SRNeuro_exclusion) &
!(f.20002.2.11 %in% SRNeuro_exclusion) & !(f.20002.2.12 %in% SRNeuro_exclusion) &
!(f.20002.2.13 %in% SRNeuro_exclusion) & !(f.20002.2.14 %in% SRNeuro_exclusion) &
!(f.20002.2.15 %in% SRNeuro_exclusion) & !(f.20002.2.16 %in% SRNeuro_exclusion),
0, 1))
SRillness$SRAnyNeuroIllness <- as.factor(SRillness$SRAnyNeuroIllness)
summary(SRillness$SRAnyNeuroIllness)
0 1
489801 12817
SRillness_red <- SRillness %>% select(IID,
SRAnyIllnessBC,
SRAnyIllnessBC.0, SRAnyIllnessBC.1, SRAnyIllnessBC.2,
SRAnyPsyIllness,
SRAnyNeuroIllness)
Self-reported CVD illness
SRCVD <- fread(input = "data_raw/2019_02_29_BC/CVD_columns.txt",
header = TRUE,
data.table = FALSE)
dim(SRCVD)
[1] 502618 13
# Rename ID column
colnames(SRCVD)[colnames(SRCVD)=="f.eid"] <- "IID"
# Extract columns
SRCVD_cols <- colnames(SRCVD[,-1])
# Create new binary column for self-reported CVD
SRCVD$SRHeartAttack <- apply(SRCVD[,SRCVD_cols] == 1, 1, any)
SRCVD$SRHeartAttack <- factor(SRCVD$SRHeartAttack, labels = c(1))
SRCVD$SRAngina <- apply(SRCVD[,SRCVD_cols] == 2, 1, any)
SRCVD$SRAngina <- factor(SRCVD$SRAngina, labels = c(1))
SRCVD$SRStroke <- apply(SRCVD[,SRCVD_cols] == 3, 1, any)
SRCVD$SRStroke <- factor(SRCVD$SRStroke, labels = c(1))
SRCVD$SRHighBloodPressure <- apply(SRCVD[,SRCVD_cols] == 4, 1, any)
SRCVD$SRHighBloodPressure <- factor(SRCVD$SRHighBloodPressure, labels = c(1))
#New data frame
SRCVD_red <- SRCVD %>%
select(IID, SRHeartAttack, SRAngina, SRStroke, SRHighBloodPressure)
summary(SRCVD_red)
IID SRHeartAttack SRAngina SRStroke
Min. :1000015 1 : 11849 1 : 16490 1 : 7901
1st Qu.:2256563 NA's:490769 NA's:486128 NA's:494717
Median :3513112
Mean :3513108
3rd Qu.:4769654
Max. :6026196
SRHighBloodPressure
1 :138100
NA's:364518
SRcancer <- fread(input = "data_raw/2019_02_29_BC/SRcancer.txt",
header = TRUE,
data.table = FALSE)
# Extract columns
SRcancer_cols <- colnames(SRcancer[,-1])
# Create new binary column for self-reported cancer
SRcancer$SRAnyCancer <- apply(SRcancer[,SRcancer_cols] == 1, 1, any)
# Recode as factor
SRcancer$SRAnyCancer <- factor(SRcancer$SRAnyCancer, labels = c(0,1))
summary(SRcancer$SRAnyCancer)
0 1
459285 43333
# Vector with codes for self-reported cancer to exclude for body composition
SRcancerBC_exclusion <-c(1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011,
1012, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024,
1025, 1026, 1027, 1028, 1031, 1032, 1033, 1034, 1035, 1036, 1037,
1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048,
1050, 1051, 1052, 1053, 1055, 1056, 1058, 1059, 1060, 1061, 1062,
1063, 1064, 1065, 1066, 1067, 1068, 1070, 1071, 1073, 1074, 1075,
1076, 1077, 1078, 1079, 1080, 1081, 1082, 1084, 1085, 1086, 1087, 1088)
# Create data frame that only contains the columns for extraction
SRcancerBC <- select(.data = SRcancer, IID, matches(paste(SRcancerBC_exclusion, collapse = "|")))
# Create new binary column if relevant for exclusion for body composition
SRcancer$SRAnyCancerBC <- apply(SRcancerBC[,-1] == 1, 1, any)
# Recode as factor
SRcancer$SRAnyCancerBC <- factor(SRcancer$SRAnyCancerBC, labels = c(0,1))
summary(SRcancer$SRAnyCancerBC)
0 1
461639 40979
# Vector with codes for self-reported cancer to exclude for neurology
SRcancerNeuro_exclusion <-c(1031, 1032)
# Create data frame that only contains the columns for extraction
SRcancerNeuro <- select(.data = SRcancer, IID, matches(paste(SRcancerNeuro_exclusion, collapse = "|")))
# Create new binary column if relevant for exclusion for neurology
SRcancer$SRAnyCancerNeuro <- apply(SRcancerNeuro[,-1] == 1, 1, any)
# Recode as factor
SRcancer$SRAnyCancerNeuro <- factor(SRcancer$SRAnyCancerNeuro, labels = c(0,1))
summary(SRcancer$SRAnyCancerNeuro)
0 1
502351 267
#New data frame
SRcancer_red <- SRcancer %>%
select("IID", "SRAnyCancer", "SRAnyCancerBC", "SRAnyCancerNeuro")
SRcancerBC_red <- SRcancer %>%
select("IID", "SRAnyCancerBC")
SRcancer_merge <- merge(SRcancer_red, SRcancerBC_red, all = TRUE, sort = F)
str(SRcancer_merge)
'data.frame': 502618 obs. of 4 variables:
$ IID : int 1000015 1000027 1000039 1000040 1000053 1000064 1000071 1000088 1000096 1000109 ...
$ SRAnyCancerBC : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ SRAnyCancer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ SRAnyCancerNeuro: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
SRautoimmune.inflammatory <- fread(input = "inflammatory_autoimmune/ICD.dx.txt",
header = TRUE,
data.table = FALSE)
Merge UKB phenotypes with MHQ, medication (meds), and ICD diagnoses (dxICD)
dim(UKBpheno_merged)
[1] 502618 44
dim(MHQ)
[1] 157358 68
UKBpheno_MHQ <- merge(UKBpheno_merged, MHQ, all.x = TRUE, sort = FALSE)
dim(UKBpheno_MHQ)
[1] 502618 111
dim(meds)
[1] 502619 18
UKBpheno_MHQ_meds <- merge(UKBpheno_MHQ, meds, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ)
dim(UKBpheno_MHQ_meds)
[1] 502618 128
dim(dxICD)
[1] 502619 24
UKBpheno_MHQ_meds_dx <- merge(UKBpheno_MHQ_meds, dxICD, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds)
dim(UKBpheno_MHQ_meds_dx)
[1] 502618 151
dim(SRillness_red)
[1] 502618 7
UKBpheno_MHQ_meds_dx_SRillness <- merge(UKBpheno_MHQ_meds_dx, SRillness_red, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx)
dim(UKBpheno_MHQ_meds_dx_SRillness)
[1] 502618 157
dim(SRcancer_merge)
[1] 502618 4
UKBpheno_MHQ_meds_dx_SRillness_SRcancer <- merge(UKBpheno_MHQ_meds_dx_SRillness, SRcancer_merge, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness)
dim(UKBpheno_MHQ_meds_dx_SRillness_SRcancer)
[1] 502618 160
dim(SRCVD_red)
[1] 502618 5
UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD <- merge(UKBpheno_MHQ_meds_dx_SRillness_SRcancer, SRCVD_red, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness_SRcancer)
dim(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD)
[1] 502618 164
dim(SRCVD_red)
[1] 502618 5
UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD_SRautoimmune <- merge(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD, SRautoimmune.inflammatory, all.x = TRUE, sort = FALSE)
rm(UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD)
pheno <- UKBpheno_MHQ_meds_dx_SRillness_SRcancer_SRCVD_SRautoimmune
pheno$SRAnorexiaNervosa.numeric <- as.numeric(as.character(pheno$SRAnorexiaNervosa))
pheno$F500.numeric <- as.integer(as.character(pheno$F500))
pheno$F501.numeric <- as.numeric(as.character(pheno$F501))
pheno$F502.numeric <- as.numeric(as.character(pheno$F502))
pheno$F505.numeric <- as.numeric(as.character(pheno$F505))
pheno$F508.numeric <- as.numeric(as.character(pheno$F508))
pheno$F509.numeric <- as.numeric(as.character(pheno$F509))
pheno$SRBulimiaNervosa.numeric <- as.numeric(as.character(pheno$SRBulimiaNervosa))
pheno$SRBingeEating.numeric <- as.numeric(as.character(pheno$SRBingeEating))
ED.orig.numeric <- c("SRAnorexiaNervosa.numeric",
"SRBulimiaNervosa.numeric",
"SRBingeEating.numeric",
"F500.numeric", "F501.numeric",
"F502.numeric",
"F505.numeric", "F508.numeric", "F509.numeric")
AN.orig.numeric <- c("SRAnorexiaNervosa.numeric",
"F500.numeric", "F501.numeric")
BN.orig.numeric <- c("SRBulimiaNervosa.numeric",
"F502.numeric")
upset(pheno,
sets = ED.orig.numeric)
Warning: Removed 3 rows containing missing values (geom_bar).
EDdataframe <- pheno %>%
select(ED.orig.numeric) %>%
filter_at(vars(ED.orig.numeric), any_vars(.==1))
EDdataframe[is.na(EDdataframe)] <- 0
colnames(EDdataframe)
[1] "SRAnorexiaNervosa.numeric" "SRBulimiaNervosa.numeric"
[3] "SRBingeEating.numeric" "F500.numeric"
[5] "F501.numeric" "F502.numeric"
[7] "F505.numeric" "F508.numeric"
[9] "F509.numeric"
upset(data = EDdataframe,
sets = AN.orig.numeric)
upset(data = EDdataframe,
sets = BN.orig.numeric)
upset(EDdataframe,
sets = ED.orig.numeric)
pdf(file = paste0("plots/upset_eds_ukb_",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(EDdataframe, sets = ED.orig.numeric)
dev.off()
quartz_off_screen
2
Eating disorders crosstables
summary(pheno$SRAnorexiaNervosa)
0 1 NA's
156467 891 345260
summary(pheno$F500)
0 1
502559 59
summary(pheno$F501)
0 1
502615 3
ftable(pheno$SRAnorexiaNervosa, pheno$F500, pheno$F501,
exclude = NULL)
0 1
0 0 156465 0
1 2 0
1 0 877 0
1 12 2
NA 0 345216 1
1 43 0
ftable(pheno$SRBulimiaNervosa, pheno$F502,
exclude = NULL)
0 1
0 156855 0
1 496 7
NA 345241 19
# Anorexia nervosa
pheno$AN.count <- with(pheno, ifelse(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1, 1, 0))
# Bulimia nervosa
pheno$BN.count <- with(pheno, ifelse(SRBulimiaNervosa == 1 | F502 == 1, 1, 0))
# Binge-eating disorder
pheno$BED.count <- with(pheno, ifelse(SRBingeEating == 1, 1, 0))
# EDNOS
pheno$EDNOS.count <- with(pheno, ifelse(F509 == 1, 1, 0))
# Vomiting (Purging disorder)
pheno$PUR.count <- with(pheno, ifelse(F505 == 1, 1, 0))
# Pica
pheno$Pica.count <- with(pheno, ifelse(F508 == 1, 1, 0))
pheno$AN.count.numeric <- as.numeric(as.character(pheno$AN.count))
pheno$BN.count.numeric <- as.numeric(as.character(pheno$BN.count))
pheno$BED.count.numeric <- as.numeric(as.character(pheno$BED.count))
pheno$EDNOS.count.numeric <- as.numeric(as.character(pheno$EDNOS.count))
pheno$PUR.count.numeric <- as.numeric(as.character(pheno$PUR.count))
pheno$Pica.count.numeric <- as.numeric(as.character(pheno$Pica.count))
pheno$AN <- with(pheno, ifelse(AN.count == 0 &
(SRBulimiaNervosa == 1 | F502 == 1 |
SRBingeEating == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$AN.count))
pheno$AN.numeric <- as.numeric(as.character(pheno$AN))
pheno$BN <- with(pheno, ifelse(BN.count == 0 &
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
SRBingeEating == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$BN.count))
pheno$BN.numeric <- as.numeric(as.character(pheno$BN))
pheno$BED <- with(pheno, ifelse(BED.count == 0 &
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
SRBulimiaNervosa == 1 | F502 == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$BED.count))
pheno$BED.numeric <- as.numeric(as.character(pheno$BED))
pheno$ANpure.count <-
with(pheno,
ifelse(
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1) & # self-reported or ICD anorexia nervosa
(is.na(SRBulimiaNervosa) | SRBulimiaNervosa == 0) & # no self-reported BN (contains NA)
F502 == 0 & # no ICD Bulimia nervosa
(is.na(SRBingeEating) | SRBingeEating == 0) & # no self-reported BED (contains NA)
F505 == 0 & # no purging / vomiting
F508 == 0 & # no Pica
F509 == 0, # no EDNOS
1, 0))
pheno$ANpure <- with(pheno, ifelse(ANpure.count == 0 &
(SRBulimiaNervosa == 1 | F502 == 1 |
SRBingeEating == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$ANpure.count))
pheno$ANpure.numeric <- as.numeric(as.character(pheno$ANpure))
pheno$BNpure.count <-
with(pheno,
ifelse(
(SRBulimiaNervosa == 1 | F502 == 1) & # no self-reported or ICD BN (contains NA)
(is.na(SRAnorexiaNervosa) | SRAnorexiaNervosa == 0) & # no self-reported AN
F500 == 0 &
F501 == 0 & # no ICD anorexia nervosa
(is.na(SRBingeEating) | SRBingeEating == 0) & # no self-reported BED (contains NA)
F505 == 0 & # no purging / vomiting
F508 == 0 & # no Pica
F509 == 0, # no EDNOS
1, 0))
pheno$BNpure <- with(pheno, ifelse(BNpure.count == 0 &
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
SRBingeEating == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$BNpure.count))
pheno$BNpure.numeric <- as.numeric(as.character(pheno$BNpure))
pheno$BEDpure.count <-
with(pheno,
ifelse(
(SRBingeEating == 1) & # no self-reported BED (contains NA)
(is.na(SRAnorexiaNervosa) | SRAnorexiaNervosa == 0) & # no self-reported AN
F500 == 0 &
F501 == 0 & # no ICD anorexia nervosa
(is.na(SRBulimiaNervosa) | SRBulimiaNervosa == 0) & # no self-reported BN (contains NA)
F502 == 0 & # no ICD Bulimia nervosa
F505 == 0 & # no purging / vomiting
F508 == 0 & # no Pica
F509 == 0, # no EDNOS
1, 0))
pheno$BEDpure <- with(pheno, ifelse(BEDpure.count == 0 &
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1 |
SRBulimiaNervosa == 1 | F502 == 1 |
F505 == 1 |
F508 == 1 |
F509 == 1),
NA,
pheno$BEDpure.count))
pheno$BEDpure.numeric <- as.numeric(as.character(pheno$BEDpure))
pheno$ED <-
with(pheno,
ifelse(
(SRAnorexiaNervosa == 1 | F500 == 1 | F501 == 1) | # Anorexia nervosa (SR, ICD)
(SRBulimiaNervosa == 1 | F502 == 1) | # Bulimia nervosa (SR, ICD)
SRBingeEating == 1 | # Binge-eating disorder (SR)
F509 == 1 | # EDNOS
F505 == 1 , # Vomiting (Purging disorder)
1, 0)
)
pheno$ED <- with(pheno, ifelse(F508 == 1, NA, pheno$ED))
pheno$ED.control <- with(pheno, ifelse(AN.count == 1 | BN.count == 1 | BED.count == 1, 1, 0))
summary(as.factor(pheno$ED.control))
0 1 NA's
155506 1912 345200
# Recode as factor
EDcols <- c("AN.count", "BN.count", "BED.count",
"EDNOS.count", "PUR.count", "Pica.count",
"AN", "BN", "BED",
"ANpure", "BNpure", "BEDpure",
"ED",
"SRAnorexiaNervosa", "SRBulimiaNervosa", "SRBingeEating",
"SREatingDisorderAny",
"SRANpure", "SRBNpure", "SRBEDpure")
pheno[EDcols] <- lapply(pheno[EDcols], factor)
summary(ICDED)
IID F500 F501 F502 F505 F508
1000015: 1 0:502560 0:502616 0:502593 0:502616 0:502613
1000027: 1 1: 59 1: 3 1: 26 1: 3 1: 6
1000039: 1
1000040: 1
1000053: 1
1000064: 1
(Other):502613
F509
0:502588
1: 31
EDcols.recoded.numeric <- c("SRAnorexiaNervosa.numeric",
"SRBulimiaNervosa.numeric",
"SRBingeEating.numeric",
"F500.numeric", "F501.numeric",
"F502.numeric",
"F505.numeric", "F508.numeric", "F509.numeric",
"AN.numeric", "BN.numeric", "BED.numeric",
"ANpure.numeric", "BNpure.numeric", "BEDpure.numeric")
EDrecoded <- pheno %>%
select(EDcols.recoded.numeric) %>%
filter_at(vars(EDcols.recoded.numeric), any_vars(.==1))
EDrecoded[is.na(EDrecoded)] <- 0
upset(data = EDrecoded, sets = EDcols.recoded.numeric)
pdf(file = paste0("plots/upset_eds_ukb_recoded",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(data = EDrecoded, sets = EDcols.recoded.numeric)
dev.off()
quartz_off_screen
2
dfSummary(pheno[, EDcols])
Data Frame Summary
pheno
Dimensions: 502618 x 20
Duplicates: 502591
---------------------------------------------------------------------------------------------------------------
No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
---- ---------------------- ---------------- -------------------- ----------------------- ---------- ----------
1 AN.count 1. 0 156465 (99.4%) IIIIIIIIIIIIIIIIIII 157402 345216
[factor] 2. 1 937 ( 0.6%) (31.32%) (68.68%)
2 BN.count 1. 0 156855 (99.7%) IIIIIIIIIIIIIIIIIII 157377 345241
[factor] 2. 1 522 ( 0.3%) (31.31%) (68.69%)
3 BED.count 1. 0 156651 (99.6%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 707 ( 0.4%) (31.31%) (68.69%)
4 EDNOS.count 1. 0 502587 (100.0%) IIIIIIIIIIIIIIIIIII 502618 0
[factor] 2. 1 31 ( 0.0%) (100%) (0%)
5 PUR.count 1. 0 502615 (100.0%) IIIIIIIIIIIIIIIIIII 502618 0
[factor] 2. 1 3 ( 0.0%) (100%) (0%)
6 Pica.count 1. 0 502612 (100.0%) IIIIIIIIIIIIIIIIIII 502618 0
[factor] 2. 1 6 ( 0.0%) (100%) (0%)
7 AN 1. 0 155503 (99.4%) IIIIIIIIIIIIIIIIIII 156440 346178
[factor] 2. 1 937 ( 0.6%) (31.13%) (68.87%)
8 BN 1. 0 155503 (99.7%) IIIIIIIIIIIIIIIIIII 156025 346593
[factor] 2. 1 522 ( 0.3%) (31.04%) (68.96%)
9 BED 1. 0 155503 (99.6%) IIIIIIIIIIIIIIIIIII 156210 346408
[factor] 2. 1 707 ( 0.4%) (31.08%) (68.92%)
10 ANpure 1. 0 155503 (99.5%) IIIIIIIIIIIIIIIIIII 156263 346355
[factor] 2. 1 760 ( 0.5%) (31.09%) (68.91%)
11 BNpure 1. 0 155503 (99.8%) IIIIIIIIIIIIIIIIIII 155820 346798
[factor] 2. 1 317 ( 0.2%) (31%) (69%)
12 BEDpure 1. 0 155503 (99.6%) IIIIIIIIIIIIIIIIIII 156100 346518
[factor] 2. 1 597 ( 0.4%) (31.06%) (68.94%)
13 ED 1. 0 155503 (98.8%) IIIIIIIIIIIIIIIIIII 157434 345184
[factor] 2. 1 1931 ( 1.2%) (31.32%) (68.68%)
14 SRAnorexiaNervosa 1. 0 156467 (99.4%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 891 ( 0.6%) (31.31%) (68.69%)
15 SRBulimiaNervosa 1. 0 156855 (99.7%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 503 ( 0.3%) (31.31%) (68.69%)
16 SRBingeEating 1. 0 156651 (99.6%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 707 ( 0.4%) (31.31%) (68.69%)
17 SREatingDisorderAny 1. 0 155507 (98.8%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 1851 ( 1.2%) (31.31%) (68.69%)
18 SRANpure 1. 0 156476 (99.4%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 882 ( 0.6%) (31.31%) (68.69%)
19 SRBNpure 1. 0 156864 (99.7%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 494 ( 0.3%) (31.31%) (68.69%)
20 SRBEDpure 1. 0 156660 (99.6%) IIIIIIIIIIIIIIIIIII 157358 345260
[factor] 2. 1 698 ( 0.4%) (31.31%) (68.69%)
---------------------------------------------------------------------------------------------------------------
EDcols.recoded <- c("AN.count.numeric", "BN.count.numeric", "BED.count.numeric",
"EDNOS.count.numeric", "PUR.count.numeric")
EDdataframe.recoded <- pheno %>%
select(EDcols.recoded) %>%
filter_at(vars(EDcols.recoded), any_vars(.==1)) %>%
rename("Anorexia nervosa" = AN.count.numeric) %>%
rename("Bulimia nervosa" = BN.count.numeric) %>%
rename("Binge-eating disorder" = BED.count.numeric) %>%
rename("EDNOS" = EDNOS.count.numeric) %>%
rename("Purging disorder" = PUR.count.numeric)
EDcols.recoded.new <- c("Purging disorder",
"EDNOS",
"Binge-eating disorder",
"Bulimia nervosa",
"Anorexia nervosa")
EDdataframe.recoded[is.na(EDdataframe.recoded)] <- 0
colnames(EDdataframe.recoded)
[1] "Anorexia nervosa" "Bulimia nervosa" "Binge-eating disorder"
[4] "EDNOS" "Purging disorder"
upset(EDdataframe.recoded,
sets = EDcols.recoded.new,
keep.order = T)
pdf(file = paste0("plots/upset_eds_recoded_ukb_",date,".pdf"), onefile = FALSE, width = 10, height = 5)
upset(EDdataframe.recoded,
sets = EDcols.recoded.new,
keep.order = T)
dev.off()
quartz_off_screen
2
Diagnoses
summarytools::dfSummary(pheno[,-1],
bootstrap.css = FALSE, # Already part of the theme so no need for it
plain.ascii = FALSE, # One of the essential settings
style = "grid", # Idem.
dfSummary.silent = TRUE, # Suppresses messages about temporary files
footnote = NA, # Keeping the results minimalistic
subtitle.emphasis = FALSE,
graph.magnif = 0.75,
tmp.img.dir = "/tmp")
Dimensions: 502618 x 208
Duplicates: 44
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Valid | Missing |
---|---|---|---|---|---|---|
1 |
Gender |
1. Female |
273405 (54.4%) |
502543 |
75 |
|
2 |
Age |
Mean (sd) : 56.5 (8.1) |
37 distinct values |
502543 |
75 |
|
3 |
Height |
Mean (sd) : 168.4 (9.3) |
557 distinct values |
500004 |
2614 |
|
4 |
Weight |
Mean (sd) : 78 (15.9) |
1361 distinct values |
492414 |
10204 |
|
5 |
BMI |
Mean (sd) : 27.4 (4.8) |
485 distinct values |
492406 |
10212 |
|
6 |
WC |
Mean (sd) : 90.3 (13.5) |
738 distinct values |
500383 |
2235 |
|
7 |
HC |
Mean (sd) : 103.4 (9.2) |
609 distinct values |
500324 |
2294 |
|
8 |
BFPC |
Mean (sd) : 31.5 (8.5) |
583 distinct values |
492134 |
10484 |
|
9 |
FM |
Mean (sd) : 24.9 (9.6) |
856 distinct values |
491568 |
11050 |
|
10 |
FFM |
Mean (sd) : 53.2 (11.5) |
726 distinct values |
492366 |
10252 |
|
11 |
SES |
Mean (sd) : -1.3 (3.1) |
57721 distinct values |
501920 |
698 |
|
12 |
Tobacco_current_orig |
Mean (sd) : 0.1 (0.4) |
-3 : 429 ( 0.1%) |
501652 |
966 |
|
13 |
Alcohol_frequency_orig |
Mean (sd) : 2.9 (1.5) |
-3 : 605 ( 0.1%) |
501646 |
972 |
|
14 |
Centre |
1. 10003 |
3797 ( 0.8%) |
502543 |
75 |
|
15 |
WHR |
Mean (sd) : 0.9 (0.1) |
10728 distinct values |
500278 |
2340 |
|
16 |
Menopause_new |
1. 0 |
229131 (45.6%) |
502536 |
82 |
|
17 |
Pregnancy_no_NA |
1. 0 |
229131 (45.6%) |
502536 |
82 |
|
18 |
Alcohol_frequency |
1. Never |
40649 ( 8.1%) |
501041 |
1577 |
|
19 |
Tobacco_current |
1. No |
448244 (89.4%) |
501223 |
1395 |
|
20 |
WHRadjBMI |
Mean (sd) : 0 (0.1) |
129103 distinct values |
492300 |
10318 |
|
21 |
WCadjBMI |
Mean (sd) : 0 (7.8) |
20620 distinct values |
492329 |
10289 |
|
22 |
HCadjBMI |
Mean (sd) : 0 (4.7) |
16568 distinct values |
492336 |
10282 |
|
23 |
WHRadjBFPC |
Mean (sd) : 0 (0.1) |
203008 distinct values |
492028 |
10590 |
|
24 |
WCadjBFPC |
Mean (sd) : 0 (13) |
32667 distinct values |
492057 |
10561 |
|
25 |
HCadjBFPC |
Mean (sd) : 0 (7.6) |
26087 distinct values |
492064 |
10554 |
|
26 |
FFMadjHeight |
Mean (sd) : 0 (6.8) |
33011 distinct values |
492113 |
10505 |
|
27 |
BFPCadjHeight |
Mean (sd) : 0 (7.4) |
35656 distinct values |
491884 |
10734 |
|
28 |
BFPC_trunk |
Mean (sd) : 31.2 (8) |
650 distinct values |
492112 |
10506 |
|
29 |
FM_trunk |
Mean (sd) : 13.7 (5.2) |
473 distinct values |
492085 |
10533 |
|
30 |
FFM_trunk |
Mean (sd) : 29.6 (6) |
426 distinct values |
492001 |
10617 |
|
31 |
Impedance_wb |
Mean (sd) : 600.1 (89) |
787 distinct values |
492355 |
10263 |
|
32 |
Ethnicity |
1. European |
472161 (94.6%) |
499077 |
3541 |
|
33 |
Region |
1. 1 |
3 ( 0.0%) |
497492 |
5126 |
|
34 |
Income |
1. -3 |
49852 (10.0%) |
496526 |
6092 |
|
35 |
Birth_weight |
Mean (sd) : 3.3 (0.7) |
366 distinct values |
277012 |
225606 |
|
36 |
Menarche_age_at |
Mean (sd) : 12.5 (2.9) |
23 distinct values |
272937 |
229681 |
|
37 |
Contraceptive_ever |
1. -3 |
498 ( 0.2%) |
272934 |
229684 |
|
38 |
HRT |
1. -3 |
297 ( 0.1%) |
272934 |
229684 |
|
39 |
Breastfed |
1. -3 |
462 ( 0.1%) |
501645 |
973 |
|
40 |
VAT |
Mean (sd) : 1217.3 (904.1) |
2394 distinct values |
5109 |
497509 |
|
41 |
BMC_wb |
Mean (sd) : 2636.7 (569.2) |
1982 distinct values |
5170 |
497448 |
|
42 |
BMD_total |
Mean (sd) : 1.2 (0.2) |
721 distinct values |
5170 |
497448 |
|
43 |
BipolarInitialQ |
1. 1 |
808 (50.0%) |
1615 |
501003 |
|
44 |
Age.At.MHQ |
Mean (sd) : 64 (7.7) |
36 distinct values |
157358 |
345260 |
|
45 |
Migrant.Status |
1. 0 |
145880 (92.8%) |
157239 |
345379 |
|
46 |
Highest.Qualification |
1. ALevel |
21077 (13.5%) |
155858 |
346760 |
|
47 |
Smoker |
1. Current |
11339 ( 7.2%) |
157285 |
345333 |
|
48 |
Longstanding.Illness |
1. 0 |
110878 (71.9%) |
154327 |
348291 |
|
49 |
Diabetes |
1. 0 |
151836 (96.7%) |
157071 |
345547 |
|
50 |
Cancer |
1. 0 |
145858 (92.9%) |
156960 |
345658 |
|
51 |
CVD |
1. 0 |
119804 (76.3%) |
157107 |
345511 |
|
52 |
Respiratory |
1. 0 |
103734 (66.0%) |
157179 |
345439 |
|
53 |
Neuroticism |
Mean (sd) : 3.9 (3.2) |
13 distinct values |
131362 |
371256 |
|
54 |
SRSocPhobia |
1. 0 |
155396 (98.8%) |
157358 |
345260 |
|
55 |
SRSchizophrenia |
1. 0 |
157201 (99.9%) |
157358 |
345260 |
|
56 |
SRPsychosisOther |
1. 0 |
156754 (99.6%) |
157358 |
345260 |
|
57 |
SRPsychosisAny |
1. 0 |
156635 (99.5%) |
157358 |
345260 |
|
58 |
SRPersonalityDisorder |
1. 0 |
156973 (99.8%) |
157358 |
345260 |
|
59 |
SROtherPhobia |
1. 0 |
155205 (98.6%) |
157358 |
345260 |
|
60 |
SRPanicAttacks |
1. 0 |
148654 (94.5%) |
157358 |
345260 |
|
61 |
SROCD |
1. 0 |
156376 (99.4%) |
157358 |
345260 |
|
62 |
SRManiaBIP |
1. 0 |
156521 (99.5%) |
157358 |
345260 |
|
63 |
SRDepression |
1. 0 |
123936 (78.8%) |
157358 |
345260 |
|
64 |
SRMood |
1. 0 |
123622 (78.6%) |
157358 |
345260 |
|
65 |
SRBulimiaNervosa |
1. 0 |
156855 (99.7%) |
157358 |
345260 |
|
66 |
SRBingeEating |
1. 0 |
156651 (99.6%) |
157358 |
345260 |
|
67 |
SRASD |
1. 0 |
157135 (99.9%) |
157358 |
345260 |
|
68 |
SRGADandOthers |
1. 0 |
135323 (86.0%) |
157358 |
345260 |
|
69 |
SRAnorexiaNervosa |
1. 0 |
156467 (99.4%) |
157358 |
345260 |
|
70 |
SREatingDisorderAny |
1. 0 |
155507 (98.8%) |
157358 |
345260 |
|
71 |
SRAgoraphobia |
1. 0 |
156759 (99.6%) |
157358 |
345260 |
|
72 |
SRAnxietyAny |
1. 0 |
129412 (82.2%) |
157358 |
345260 |
|
73 |
SRADHD |
1. 0 |
157225 (99.9%) |
157358 |
345260 |
|
74 |
SRPNTA |
Min : 0 |
0 : 156825 (99.7%) |
157358 |
345260 |
|
75 |
SmithDepression |
1. 0 |
30883 (72.9%) |
42374 |
460244 |
|
76 |
PHQ9.No.Info |
1. 0 |
157136 (99.9%) |
157358 |
345260 |
|
77 |
PHQ9.Screen |
1. 0 |
116288 (73.9%) |
157358 |
345260 |
|
78 |
PHQ9.Items |
1. 0 |
119203 (75.8%) |
157358 |
345260 |
|
79 |
PHQ9.Severity |
Mean (sd) : 2.8 (3.7) |
28 distinct values |
157358 |
345260 |
|
80 |
Depressed.Ever |
1. 0 |
88647 (70.3%) |
126077 |
376541 |
|
81 |
Depressed.Ever.Severe |
1. 0 |
120125 (95.3%) |
126077 |
376541 |
|
82 |
Recurrent.Depression |
1. 0 |
42231 (66.6%) |
63416 |
439202 |
|
83 |
Single.Depression |
1. 0 |
48735 (76.8%) |
63416 |
439202 |
|
84 |
SmithBipolar |
1. 0 |
41874 (98.8%) |
42374 |
460244 |
|
85 |
GAD7.Severity |
Mean (sd) : 2.2 (3.4) |
22 distinct values |
157264 |
345354 |
|
86 |
GAD.Ever |
1. 0 |
96793 (89.7%) |
107903 |
394715 |
|
87 |
GAD.Current |
1. 0 |
105222 (97.5%) |
107901 |
394717 |
|
88 |
AUDIT.Score |
Mean (sd) : 4.9 (4.2) |
41 distinct values |
157358 |
345260 |
|
89 |
Alcohol.Use.Disorder |
1. 0 |
63455 (66.1%) |
96056 |
406562 |
|
90 |
Cannabis.Ever |
1. 0 |
122473 (77.9%) |
157129 |
345489 |
|
91 |
Cannabis.Daily |
1. 0 |
32572 (93.4%) |
34885 |
467733 |
|
92 |
Addiction.Ever.SelfReport |
1. 0 |
146216 (94.0%) |
155600 |
347018 |
|
93 |
Addiction.Ever |
1. 0 |
142276 (93.8%) |
151660 |
350958 |
|
94 |
Substance.Addiction.Ever |
1. 0 |
4378 (46.7%) |
9380 |
493238 |
|
95 |
Alcohol.Dependence.Ever |
1. 0 |
2489 (72.5%) |
3435 |
499183 |
|
96 |
Addiction.Current |
1. 0 |
3535 (54.2%) |
6525 |
496093 |
|
97 |
Trauma.Childhood |
1. 0 |
82402 (53.6%) |
153643 |
348975 |
|
98 |
Trauma.Adult |
1. 0 |
69977 (46.4%) |
150932 |
351686 |
|
99 |
Trauma.Catastrophic |
1. 0 |
77538 (49.3%) |
157326 |
345292 |
|
100 |
PTSD |
1. 0 |
146649 (93.6%) |
156712 |
345906 |
|
101 |
Self.Harm.Ever |
1. 0 |
150011 (95.6%) |
156883 |
345735 |
|
102 |
Not.Worth.Living |
Min : 0 |
0 : 108757 (69.1%) |
157358 |
345260 |
|
103 |
Self.Harm.Suicide.Attempt |
1. 0 |
3201 (48.3%) |
6627 |
495991 |
|
104 |
WellbeingScore |
Mean (sd) : 12.7 (2) |
15 distinct values |
152694 |
349924 |
|
105 |
NoSRConditions |
1. 0 |
54017 (34.3%) |
157358 |
345260 |
|
106 |
MHQ |
1. 1 |
157358 (100.0%) |
157358 |
345260 |
|
107 |
SRAnyMDX |
1. 0 |
107259 (68.2%) |
157358 |
345260 |
|
108 |
SRANpure |
1. 0 |
156476 (99.4%) |
157358 |
345260 |
|
109 |
SRBNpure |
1. 0 |
156864 (99.7%) |
157358 |
345260 |
|
110 |
SRBEDpure |
1. 0 |
156660 (99.6%) |
157358 |
345260 |
|
111 |
Corticoids |
1. 0 |
490339 (97.6%) |
502618 |
0 |
|
112 |
Antidiabetics |
1. 0 |
483581 (96.2%) |
502618 |
0 |
|
113 |
Diuretics |
1. 0 |
462505 (92.0%) |
502618 |
0 |
|
114 |
Gonadotropins |
1. 0 |
502081 (99.9%) |
502618 |
0 |
|
115 |
Growth_Hormone |
1. 0 |
502446 (100.0%) |
502618 |
0 |
|
116 |
AntiretroviralMed |
1. 0 |
502195 (99.9%) |
502618 |
0 |
|
117 |
HRT_Contraceptives |
1. 0 |
478495 (95.2%) |
502618 |
0 |
|
118 |
Antiosteoporotics |
1. 0 |
493957 (98.3%) |
502618 |
0 |
|
119 |
Testosterone |
1. 0 |
502030 (99.9%) |
502618 |
0 |
|
120 |
Thyreostatics |
1. 0 |
474247 (94.4%) |
502618 |
0 |
|
121 |
AntitubercularAntileproticMed |
1. 0 |
502511 (100.0%) |
502618 |
0 |
|
122 |
Antidepressants |
1. 0 |
465830 (92.7%) |
502618 |
0 |
|
123 |
Antineoplastics |
1. 0 |
495299 (98.5%) |
502618 |
0 |
|
124 |
Antipsychotics |
1. 0 |
499662 (99.4%) |
502618 |
0 |
|
125 |
Anxiolytics |
1. 0 |
496967 (98.9%) |
502618 |
0 |
|
126 |
MetabolicMed |
1. 0 |
360326 (71.7%) |
502618 |
0 |
|
127 |
PsychotropicMed |
1. 0 |
450195 (89.6%) |
502618 |
0 |
|
128 |
DxCancerAny |
1. 0 |
429640 (85.5%) |
502618 |
0 |
|
129 |
DxBodyCompAny |
1. 0 |
330751 (65.8%) |
502618 |
0 |
|
130 |
CancerRegister |
1. 0 |
437661 (87.1%) |
502618 |
0 |
|
131 |
DxConnectiveTissue |
1. 0 |
498946 (99.3%) |
502618 |
0 |
|
132 |
DxDiabetes |
1. 0 |
476605 (94.8%) |
502618 |
0 |
|
133 |
DxEndocrine |
1. 0 |
499401 (99.4%) |
502618 |
0 |
|
134 |
DxGlucose |
1. 0 |
501409 (99.8%) |
502618 |
0 |
|
135 |
DxHIV |
1. 0 |
502407 (100.0%) |
502618 |
0 |
|
136 |
DxIBD |
1. 0 |
479133 (95.3%) |
502618 |
0 |
|
137 |
DxIBS |
1. 0 |
495941 (98.7%) |
502618 |
0 |
|
138 |
DxLiver |
1. 0 |
496400 (98.8%) |
502618 |
0 |
|
139 |
DxPsy |
1. 0 |
463307 (92.2%) |
502618 |
0 |
|
140 |
DxMetabolic |
1. 0 |
447982 (89.1%) |
502618 |
0 |
|
141 |
DxMuscles |
1. 0 |
501215 (99.7%) |
502618 |
0 |
|
142 |
DxPancreatitis |
1. 0 |
501977 (99.9%) |
502618 |
0 |
|
143 |
DxThyroid |
1. 0 |
481756 (95.9%) |
502618 |
0 |
|
144 |
DxTuberculosis |
1. 0 |
502333 (99.9%) |
502618 |
0 |
|
145 |
F500 |
1. 0 |
502559 (100.0%) |
502618 |
0 |
|
146 |
F501 |
1. 0 |
502615 (100.0%) |
502618 |
0 |
|
147 |
F502 |
1. 0 |
502592 (100.0%) |
502618 |
0 |
|
148 |
F505 |
1. 0 |
502615 (100.0%) |
502618 |
0 |
|
149 |
F508 |
1. 0 |
502612 (100.0%) |
502618 |
0 |
|
150 |
F509 |
1. 0 |
502587 (100.0%) |
502618 |
0 |
|
151 |
SRAnyIllnessBC |
1. 0 |
374256 (74.5%) |
502618 |
0 |
|
152 |
SRAnyIllnessBC.0 |
1. 0 |
378244 (75.2%) |
502618 |
0 |
|
153 |
SRAnyIllnessBC.1 |
1. 0 |
498211 (99.1%) |
502618 |
0 |
|
154 |
SRAnyIllnessBC.2 |
1. 0 |
498903 (99.3%) |
502618 |
0 |
|
155 |
SRAnyPsyIllness |
1. 0 |
464923 (92.5%) |
502618 |
0 |
|
156 |
SRAnyNeuroIllness |
1. 0 |
489801 (97.5%) |
502618 |
0 |
|
157 |
SRAnyCancerBC |
1. 0 |
461639 (91.8%) |
502618 |
0 |
|
158 |
SRAnyCancer |
1. 0 |
459285 (91.4%) |
502618 |
0 |
|
159 |
SRAnyCancerNeuro |
1. 0 |
502351 (100.0%) |
502618 |
0 |
|
160 |
SRHeartAttack |
1. 1 |
11849 (100.0%) |
11849 |
490769 |
|
161 |
SRAngina |
1. 1 |
16490 (100.0%) |
16490 |
486128 |
|
162 |
SRStroke |
1. 1 |
7901 (100.0%) |
7901 |
494717 |
|
163 |
SRHighBloodPressure |
1. 1 |
138100 (100.0%) |
138100 |
364518 |
|
164 |
ICD.autoimmune |
Min : 0 |
0 : 460599 (91.6%) |
502618 |
0 |
|
165 |
ICD.autoinflammatory |
Min : 0 |
0 : 352521 (70.1%) |
502618 |
0 |
|
166 |
ICD.immunodeficiency |
Min : 0 |
0 : 502049 (99.9%) |
502618 |
0 |
|
167 |
ICD.memory |
Min : 0 |
0 : 497862 (99.1%) |
502618 |
0 |
|
168 |
ICD.metabolic |
Min : 0 |
0 : 418253 (83.2%) |
502618 |
0 |
|
169 |
ICD.psychiatric |
Min : 0 |
0 : 463307 (92.2%) |
502618 |
0 |
|
170 |
ICD.immunodysregulation |
Min : 0 |
0 : 345449 (68.7%) |
502618 |
0 |
|
171 |
SRAnorexiaNervosa.numeric |
Min : 0 |
0 : 156467 (99.4%) |
157358 |
345260 |
|
172 |
F500.numeric |
Min : 0 |
0 : 502559 (100.0%) |
502618 |
0 |
|
173 |
F501.numeric |
Min : 0 |
0 : 502615 (100.0%) |
502618 |
0 |
|
174 |
F502.numeric |
Min : 0 |
0 : 502592 (100.0%) |
502618 |
0 |
|
175 |
F505.numeric |
Min : 0 |
0 : 502615 (100.0%) |
502618 |
0 |
|
176 |
F508.numeric |
Min : 0 |
0 : 502612 (100.0%) |
502618 |
0 |
|
177 |
F509.numeric |
Min : 0 |
0 : 502587 (100.0%) |
502618 |
0 |
|
178 |
SRBulimiaNervosa.numeric |
Min : 0 |
0 : 156855 (99.7%) |
157358 |
345260 |
|
179 |
SRBingeEating.numeric |
Min : 0 |
0 : 156651 (99.6%) |
157358 |
345260 |
|
180 |
AN.count |
1. 0 |
156465 (99.4%) |
157402 |
345216 |
|
181 |
BN.count |
1. 0 |
156855 (99.7%) |
157377 |
345241 |
|
182 |
BED.count |
1. 0 |
156651 (99.6%) |
157358 |
345260 |
|
183 |
EDNOS.count |
1. 0 |
502587 (100.0%) |
502618 |
0 |
|
184 |
PUR.count |
1. 0 |
502615 (100.0%) |
502618 |
0 |
|
185 |
Pica.count |
1. 0 |
502612 (100.0%) |
502618 |
0 |
|
186 |
AN.count.numeric |
Min : 0 |
0 : 156465 (99.4%) |
157402 |
345216 |
|
187 |
BN.count.numeric |
Min : 0 |
0 : 156855 (99.7%) |
157377 |
345241 |
|
188 |
BED.count.numeric |
Min : 0 |
0 : 156651 (99.6%) |
157358 |
345260 |
|
189 |
EDNOS.count.numeric |
Min : 0 |
0 : 502587 (100.0%) |
502618 |
0 |
|
190 |
PUR.count.numeric |
Min : 0 |
0 : 502615 (100.0%) |
502618 |
0 |
|
191 |
Pica.count.numeric |
Min : 0 |
0 : 502612 (100.0%) |
502618 |
0 |
|
192 |
AN |
1. 0 |
155503 (99.4%) |
156440 |
346178 |
|
193 |
AN.numeric |
Min : 0 |
0 : 155503 (99.4%) |
156440 |
346178 |
|
194 |
BN |
1. 0 |
155503 (99.7%) |
156025 |
346593 |
|
195 |
BN.numeric |
Min : 0 |
0 : 155503 (99.7%) |
156025 |
346593 |
|
196 |
BED |
1. 0 |
155503 (99.6%) |
156210 |
346408 |
|
197 |
BED.numeric |
Min : 0 |
0 : 155503 (99.6%) |
156210 |
346408 |
|
198 |
ANpure.count |
Min : 0 |
0 : 156680 (99.5%) |
157440 |
345178 |
|
199 |
ANpure |
1. 0 |
155503 (99.5%) |
156263 |
346355 |
|
200 |
ANpure.numeric |
Min : 0 |
0 : 155503 (99.5%) |
156263 |
346355 |
|
201 |
BNpure.count |
Min : 0 |
0 : 157123 (99.8%) |
157440 |
345178 |
|
202 |
BNpure |
1. 0 |
155503 (99.8%) |
155820 |
346798 |
|
203 |
BNpure.numeric |
Min : 0 |
0 : 155503 (99.8%) |
155820 |
346798 |
|
204 |
BEDpure.count |
Min : 0 |
0 : 156843 (99.6%) |
157440 |
345178 |
|
205 |
BEDpure |
1. 0 |
155503 (99.6%) |
156100 |
346518 |
|
206 |
BEDpure.numeric |
Min : 0 |
0 : 155503 (99.6%) |
156100 |
346518 |
|
207 |
ED |
1. 0 |
155503 (98.8%) |
157434 |
345184 |
|
208 |
ED.control |
Min : 0 |
0 : 155506 (98.8%) |
157418 |
345200 |
pcs <- fread(file = "data_raw/2019_02_29_BC/pcs_pcas.txt",
header = TRUE, data.table = FALSE)
dim(pcs)
[1] 385753 17
# Select 10 PCs
pcs_reduced <- pcs[,2:12]
dim(pcs_reduced)
[1] 385753 11
colnames(pcs_reduced)
[1] "IID" "PC1" "PC2" "PC3" "PC4" "PC5" "PC6" "PC7" "PC8" "PC9"
[11] "PC10"
Batch and array type
batch_array <- fread(file = "data_raw/2019_02_29_BC/2754_Batch_Array.txt",
header = FALSE, data.table = FALSE)
# Add column names
colnames(batch_array) <- c("IID", "Array", "Batch")
batch_array$Array <- as.factor(batch_array$Array)
batch_array$Batch <- as.factor(batch_array$Batch)
dim(batch_array)
[1] 488377 3
MAF0.01_GENO0.02_MIND0.02_CAUC1_UKBQC1_UNREL0.044_HWE0.00000001_SEX1
GID <- fread(file = "data_raw/2019_02_29_BC/GID_list.txt",
header = TRUE,
data.table = FALSE)
GID$Passed_GQC <- factor(GID$Passed_GQC, levels = c(0, 1, 2), labels = c(0, "Male", "Female"))
summary(GID$Passed_GQC)
0 Male Female
10 177555 208188
GID$EuropeanGenetic <- 1
summary(GID$Passed_GQC)
0 Male Female
10 177555 208188
str(GID)
'data.frame': 385753 obs. of 3 variables:
$ IID : int 1993198 4439466 3643257 2033164 1890169 2373695 5873836 5835698 4946986 3939224 ...
$ Passed_GQC : Factor w/ 3 levels "0","Male","Female": 2 3 3 3 3 3 3 3 3 3 ...
$ EuropeanGenetic: num 1 1 1 1 1 1 1 1 1 1 ...
dim(GID)
[1] 385753 3
dim(pheno)
[1] 502618 209
GID_pheno <- merge(GID, pheno, all = FALSE, sort = FALSE)
dim(GID_pheno)
[1] 385743 211
Merge genetic IDs and UKB phenotypes with MHQ, medication (meds), and ICD diagnoses (dxICD)
pcs_batch_array <- merge(pcs_reduced, batch_array, all = TRUE, sort = FALSE)
dim(pcs_batch_array)
[1] 488377 13
GID_pheno_pcs_batch_array <- merge(GID_pheno,
pcs_batch_array,
all.x = TRUE,
sort = FALSE)
GIDpheno <- GID_pheno_pcs_batch_array
dim(GIDpheno)
[1] 385743 223
dim(GID)
[1] 385753 3
dim(pheno)
[1] 502618 209
GID_pheno_all <- merge(GID, pheno, all = TRUE, sort = FALSE)
dim(GID_pheno_all)
[1] 502628 211
GID_pheno_all_genetic <- merge(GID_pheno_all, pcs_batch_array, all = TRUE, sort = FALSE)
dim(GID_pheno_all_genetic)
[1] 502630 223
Extract complete cases
GIDpheno_cc <- GIDpheno[complete.cases(GIDpheno[,cols_cc]),]
cases_incomplete <- nrow(GIDpheno_cc)-nrow(GIDpheno)
cases_incomplete
[1] -8310
dim(GIDpheno_cc)
[1] 377433 223
sum(is.na(GIDpheno_cc[,cols_cc]))
[1] 0
summary(GIDpheno_cc$Gender)
Female Male
204244 173189
Female and male
# Female
GIDpheno_cc_female <- subset(GIDpheno_cc, Gender == "Female")
dim(GIDpheno_cc_female)
[1] 204244 223
# 208188 325
# Male
GIDpheno_cc_male <- subset(GIDpheno_cc, Gender == "Male")
dim(GIDpheno_cc_male)
[1] 173189 223
# 177554 325
Write phenotype file: European participants with complete body composition data including genetic information and participants with hysterectomy
write.table(GIDpheno_cc,
file = paste("data/BCpheno",date,".txt", sep =""),
quote = T,
sep = "\t",
col.names = T,
row.names = F)
Recode menopause to binary Recodes every female with hysterctomy to NA
# Empty column
GIDpheno_cc["Menopause_bin"] <- NA
# Male & NA -> 0
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 0] <- 0)
# Female & NA -> NA
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 1] <- NA)
# Female & Yes -> 1
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 2] <- 1)
# Female & Hysterectomy -> NA
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 3] <- NA)
# Female & Not sure -> 0
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 4] <- 0)
#Female & Prefer not to answer
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 5] <- NA)
# Female & No
GIDpheno_cc <- within(GIDpheno_cc, Menopause_bin[Menopause_new == 6] <- 0)
# Recode as factor
GIDpheno_cc$Menopause_bin <- as.factor(GIDpheno_cc$Menopause_bin)
summary(GIDpheno_cc$Menopause_bin)
0 1 NA's
228955 124974 23504
# OLD
# 0 1 NA's
# 234273 127330 24139
Exclude hysterectomy and pregnancy 0) male & NA 1) female & NA 2) female & yes 3) female & hysterectomy 4) female & not sure 5) female & prefer not to answer 6) female & no
GIDpheno_cc_menopause <- subset(GIDpheno_cc, Menopause_new == 0 | Menopause_new == 2 | Menopause_new == 4 | Menopause_new == 6)
dim(GIDpheno_cc_menopause)
[1] 353929 224
# 361603 325
# Menopause prior to subsetting
summary(GIDpheno_cc$Menopause_new)
0 1 2 3 4 5 6
173189 1 124974 23334 8747 169 47019
#0 1 2 3 4 5 6 NA's
#177552 128 127330 23826 8896 183 47825 2
# Menopause after subsetting
summary(GIDpheno_cc_menopause$Menopause_new)
0 1 2 3 4 5 6
173189 0 124974 0 8747 0 47019
# 0 1 2 3 4 5 6
# 177552 0 127330 0 8896 0 47825
Exclude pregnant women 0) male & NA 1) female & NA 2) female & no 3) female & yes 4) female & unsure
GIDpheno_cc_menopause_pregnancy <- subset(GIDpheno_cc_menopause, Pregnancy_no_NA == 0 | Pregnancy_no_NA == 2)
### Prior to subsetting of hysterectomy and menopause
summary(GIDpheno_cc$Pregnancy_no_NA)
0 1 2 3 4
173189 0 204244 0 0
#0 1 2 3 4 NA's
#177552 209 207720 105 154 2
### Prior to subsetting of pregnancy
summary(GIDpheno_cc_menopause$Pregnancy_no_NA)
0 1 2 3 4
173189 0 180740 0 0
# 0 1 2 3 4
#177552 174 183623 104 150
### After subsetting of pregnancy
summary(GIDpheno_cc_menopause_pregnancy$Pregnancy_no_NA)
0 1 2 3 4
173189 0 180740 0 0
#0 1 2 3 4
#177552 0 183623 0 0
# Double check: there should be no NAs anymore in the binary Menopause variable
summary(GIDpheno_cc_menopause_pregnancy$Menopause_bin)
0 1
228955 124974
# 0 1
# 233987 127188
dim(GIDpheno_cc_menopause_pregnancy)
[1] 353929 224
# 361175 321
without hysterectomy and pregnancy including diagnoses, medication, selfreport, and genetic variables
OLD:353972 321
Move data frame
complete <- GIDpheno_cc_menopause_pregnancy
Create female and male subset
# Female
complete_female <- subset(complete, Gender == "Female")
dim(complete_female)
[1] 180740 224
# 180765 321
# Male
complete_male <- subset(complete, Gender == "Male")
dim(complete_male)
[1] 173189 224
# 173207 321
Descriptives: Complete (300K)
summarytools::dfSummary(complete[,-1],
bootstrap.css = FALSE, # Already part of the theme so no need for it
plain.ascii = FALSE, # One of the essential settings
style = "grid", # Idem.
dfSummary.silent = TRUE, # Suppresses messages about temporary files
footnote = NA, # Keeping the results minimalistic
subtitle.emphasis = FALSE,
graph.magnif = 0.75,
tmp.img.dir = "/tmp")
Dimensions: 353929 x 223
Duplicates: 0
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Valid | Missing |
---|---|---|---|---|---|---|
1 |
Passed_GQC |
1. 0 |
0 ( 0.0%) |
353929 |
0 |
|
2 |
EuropeanGenetic |
1 distinct value |
1 : 353929 (100.0%) |
353929 |
0 |
|
3 |
Gender |
1. Female |
180740 (51.1%) |
353929 |
0 |
|
4 |
Age |
Mean (sd) : 56.6 (8.1) |
36 distinct values |
353929 |
0 |
|
5 |
Height |
Mean (sd) : 169.2 (9.2) |
535 distinct values |
353929 |
0 |
|
6 |
Weight |
Mean (sd) : 78.5 (15.9) |
1288 distinct values |
353929 |
0 |
|
7 |
BMI |
Mean (sd) : 27.3 (4.7) |
462 distinct values |
353929 |
0 |
|
8 |
WC |
Mean (sd) : 90.4 (13.5) |
704 distinct values |
353929 |
0 |
|
9 |
HC |
Mean (sd) : 103.3 (9) |
558 distinct values |
353929 |
0 |
|
10 |
BFPC |
Mean (sd) : 30.9 (8.4) |
556 distinct values |
353929 |
0 |
|
11 |
FM |
Mean (sd) : 24.5 (9.4) |
827 distinct values |
353929 |
0 |
|
12 |
FFM |
Mean (sd) : 54 (11.6) |
706 distinct values |
353929 |
0 |
|
13 |
SES |
Mean (sd) : -1.5 (3) |
52279 distinct values |
353929 |
0 |
|
14 |
Tobacco_current_orig |
Mean (sd) : 0.1 (0.4) |
-3 : 191 ( 0.1%) |
353929 |
0 |
|
15 |
Alcohol_frequency_orig |
Mean (sd) : 2.8 (1.5) |
-3 : 261 ( 0.1%) |
353929 |
0 |
|
16 |
Centre |
1. 10003 |
317 ( 0.1%) |
353929 |
0 |
|
17 |
WHR |
Mean (sd) : 0.9 (0.1) |
8666 distinct values |
353929 |
0 |
|
18 |
Menopause_new |
1. 0 |
173189 (48.9%) |
353929 |
0 |
|
19 |
Pregnancy_no_NA |
1. 0 |
173189 (48.9%) |
353929 |
0 |
|
20 |
Alcohol_frequency |
1. Never |
22330 ( 6.3%) |
353668 |
261 |
|
21 |
Tobacco_current |
1. No |
316964 (89.6%) |
353738 |
191 |
|
22 |
WHRadjBMI |
Mean (sd) : 0 (0.1) |
108094 distinct values |
353929 |
0 |
|
23 |
WCadjBMI |
Mean (sd) : 0.4 (7.8) |
17817 distinct values |
353929 |
0 |
|
24 |
HCadjBMI |
Mean (sd) : 0.1 (4.6) |
14219 distinct values |
353929 |
0 |
|
25 |
WHRadjBFPC |
Mean (sd) : 0 (0.1) |
167707 distinct values |
353929 |
0 |
|
26 |
WCadjBFPC |
Mean (sd) : 0.4 (13.1) |
29019 distinct values |
353929 |
0 |
|
27 |
HCadjBFPC |
Mean (sd) : 0.2 (7.5) |
22685 distinct values |
353929 |
0 |
|
28 |
FFMadjHeight |
Mean (sd) : 0 (6.8) |
28768 distinct values |
353929 |
0 |
|
29 |
BFPCadjHeight |
Mean (sd) : -0.2 (7.3) |
30583 distinct values |
353929 |
0 |
|
30 |
BFPC_trunk |
Mean (sd) : 30.8 (7.9) |
629 distinct values |
353727 |
202 |
|
31 |
FM_trunk |
Mean (sd) : 13.7 (5.1) |
454 distinct values |
353705 |
224 |
|
32 |
FFM_trunk |
Mean (sd) : 30 (6) |
409 distinct values |
353639 |
290 |
|
33 |
Impedance_wb |
Mean (sd) : 595.9 (88.4) |
746 distinct values |
353898 |
31 |
|
34 |
Ethnicity |
1. European |
351224 (99.6%) |
352499 |
1430 |
|
35 |
Region |
1. 1 |
3 ( 0.0%) |
350549 |
3380 |
|
36 |
Income |
1. -3 |
33223 ( 9.4%) |
352806 |
1123 |
|
37 |
Birth_weight |
Mean (sd) : 3.3 (0.7) |
344 distinct values |
199813 |
154116 |
|
38 |
Menarche_age_at |
Mean (sd) : 12.6 (2.8) |
23 distinct values |
180740 |
173189 |
|
39 |
Contraceptive_ever |
1. -3 |
146 ( 0.1%) |
180740 |
173189 |
|
40 |
HRT |
1. -3 |
55 ( 0.0%) |
180740 |
173189 |
|
41 |
Breastfed |
1. -3 |
130 ( 0.0%) |
353929 |
0 |
|
42 |
VAT |
Mean (sd) : 1243.1 (928.5) |
2105 distinct values |
3801 |
350128 |
|
43 |
BMC_wb |
Mean (sd) : 2663.7 (573.5) |
1801 distinct values |
3852 |
350077 |
|
44 |
BMD_total |
Mean (sd) : 1.2 (0.2) |
689 distinct values |
3852 |
350077 |
|
45 |
BipolarInitialQ |
1. 1 |
557 (52.6%) |
1059 |
352870 |
|
46 |
Age.At.MHQ |
Mean (sd) : 64 (7.7) |
35 distinct values |
116985 |
236944 |
|
47 |
Migrant.Status |
1. 0 |
109977 (94.0%) |
116965 |
236964 |
|
48 |
Highest.Qualification |
1. ALevel |
16004 (13.8%) |
116426 |
237503 |
|
49 |
Smoker |
1. Current |
8421 ( 7.2%) |
116985 |
236944 |
|
50 |
Longstanding.Illness |
1. 0 |
83517 (72.7%) |
114873 |
239056 |
|
51 |
Diabetes |
1. 0 |
113142 (96.8%) |
116850 |
237079 |
|
52 |
Cancer |
1. 0 |
108882 (93.2%) |
116783 |
237146 |
|
53 |
CVD |
1. 0 |
89806 (76.8%) |
116869 |
237060 |
|
54 |
Respiratory |
1. 0 |
77576 (66.3%) |
116916 |
237013 |
|
55 |
Neuroticism |
Mean (sd) : 3.8 (3.2) |
13 distinct values |
98179 |
255750 |
|
56 |
SRSocPhobia |
1. 0 |
115562 (98.8%) |
116985 |
236944 |
|
57 |
SRSchizophrenia |
1. 0 |
116879 (99.9%) |
116985 |
236944 |
|
58 |
SRPsychosisOther |
1. 0 |
116542 (99.6%) |
116985 |
236944 |
|
59 |
SRPsychosisAny |
1. 0 |
116460 (99.6%) |
116985 |
236944 |
|
60 |
SRPersonalityDisorder |
1. 0 |
116712 (99.8%) |
116985 |
236944 |
|
61 |
SROtherPhobia |
1. 0 |
115482 (98.7%) |
116985 |
236944 |
|
62 |
SRPanicAttacks |
1. 0 |
110759 (94.7%) |
116985 |
236944 |
|
63 |
SROCD |
1. 0 |
116285 (99.4%) |
116985 |
236944 |
|
64 |
SRManiaBIP |
1. 0 |
116388 (99.5%) |
116985 |
236944 |
|
65 |
SRDepression |
1. 0 |
92762 (79.3%) |
116985 |
236944 |
|
66 |
SRMood |
1. 0 |
92528 (79.1%) |
116985 |
236944 |
|
67 |
SRBulimiaNervosa |
1. 0 |
116622 (99.7%) |
116985 |
236944 |
|
68 |
SRBingeEating |
1. 0 |
116505 (99.6%) |
116985 |
236944 |
|
69 |
SRASD |
1. 0 |
116812 (99.9%) |
116985 |
236944 |
|
70 |
SRGADandOthers |
1. 0 |
100836 (86.2%) |
116985 |
236944 |
|
71 |
SRAnorexiaNervosa |
1. 0 |
116334 (99.4%) |
116985 |
236944 |
|
72 |
SREatingDisorderAny |
1. 0 |
115682 (98.9%) |
116985 |
236944 |
|
73 |
SRAgoraphobia |
1. 0 |
116538 (99.6%) |
116985 |
236944 |
|
74 |
SRAnxietyAny |
1. 0 |
96648 (82.6%) |
116985 |
236944 |
|
75 |
SRADHD |
1. 0 |
116896 (99.9%) |
116985 |
236944 |
|
76 |
SRPNTA |
Min : 0 |
0 : 116600 (99.7%) |
116985 |
236944 |
|
77 |
SmithDepression |
1. 0 |
22858 (73.3%) |
31182 |
322747 |
|
78 |
PHQ9.No.Info |
1. 0 |
116840 (99.9%) |
116985 |
236944 |
|
79 |
PHQ9.Screen |
1. 0 |
87006 (74.4%) |
116985 |
236944 |
|
80 |
PHQ9.Items |
1. 0 |
89603 (76.6%) |
116985 |
236944 |
|
81 |
PHQ9.Severity |
Mean (sd) : 2.7 (3.7) |
28 distinct values |
116985 |
236944 |
|
82 |
Depressed.Ever |
1. 0 |
66802 (71.1%) |
94000 |
259929 |
|
83 |
Depressed.Ever.Severe |
1. 0 |
89812 (95.5%) |
94000 |
259929 |
|
84 |
Recurrent.Depression |
1. 0 |
31416 (67.2%) |
46745 |
307184 |
|
85 |
Single.Depression |
1. 0 |
35993 (77.0%) |
46745 |
307184 |
|
86 |
SmithBipolar |
1. 0 |
30834 (98.9%) |
31182 |
322747 |
|
87 |
GAD7.Severity |
Mean (sd) : 2.1 (3.3) |
22 distinct values |
116922 |
237007 |
|
88 |
GAD.Ever |
1. 0 |
72566 (90.0%) |
80662 |
273267 |
|
89 |
GAD.Current |
1. 0 |
78789 (97.7%) |
80660 |
273269 |
|
90 |
AUDIT.Score |
Mean (sd) : 5.1 (4.2) |
41 distinct values |
116985 |
236944 |
|
91 |
Alcohol.Use.Disorder |
1. 0 |
45384 (64.1%) |
70779 |
283150 |
|
92 |
Cannabis.Ever |
1. 0 |
89977 (77.0%) |
116814 |
237115 |
|
93 |
Cannabis.Daily |
1. 0 |
25231 (93.4%) |
27008 |
326921 |
|
94 |
Addiction.Ever.SelfReport |
1. 0 |
108501 (93.8%) |
115626 |
238303 |
|
95 |
Addiction.Ever |
1. 0 |
105450 (93.7%) |
112575 |
241354 |
|
96 |
Substance.Addiction.Ever |
1. 0 |
3321 (46.6%) |
7121 |
346808 |
|
97 |
Alcohol.Dependence.Ever |
1. 0 |
1900 (72.3%) |
2629 |
351300 |
|
98 |
Addiction.Current |
1. 0 |
2663 (53.8%) |
4953 |
348976 |
|
99 |
Trauma.Childhood |
1. 0 |
62363 (54.5%) |
114496 |
239433 |
|
100 |
Trauma.Adult |
1. 0 |
53207 (47.3%) |
112515 |
241414 |
|
101 |
Trauma.Catastrophic |
1. 0 |
57565 (49.2%) |
116960 |
236969 |
|
102 |
PTSD |
1. 0 |
109412 (93.9%) |
116546 |
237383 |
|
103 |
Self.Harm.Ever |
1. 0 |
111646 (95.7%) |
116653 |
237276 |
|
104 |
Not.Worth.Living |
Min : 0 |
0 : 81132 (69.3%) |
116985 |
236944 |
|
105 |
Self.Harm.Suicide.Attempt |
1. 0 |
2384 (49.5%) |
4818 |
349111 |
|
106 |
WellbeingScore |
Mean (sd) : 12.7 (2) |
15 distinct values |
113605 |
240324 |
|
107 |
NoSRConditions |
1. 0 |
39502 (33.8%) |
116985 |
236944 |
|
108 |
MHQ |
1. 1 |
116985 (100.0%) |
116985 |
236944 |
|
109 |
SRAnyMDX |
1. 0 |
80538 (68.8%) |
116985 |
236944 |
|
110 |
SRANpure |
1. 0 |
116339 (99.5%) |
116985 |
236944 |
|
111 |
SRBNpure |
1. 0 |
116627 (99.7%) |
116985 |
236944 |
|
112 |
SRBEDpure |
1. 0 |
116510 (99.6%) |
116985 |
236944 |
|
113 |
Corticoids |
1. 0 |
345620 (97.7%) |
353929 |
0 |
|
114 |
Antidiabetics |
1. 0 |
342197 (96.7%) |
353929 |
0 |
|
115 |
Diuretics |
1. 0 |
327642 (92.6%) |
353929 |
0 |
|
116 |
Gonadotropins |
1. 0 |
353541 (99.9%) |
353929 |
0 |
|
117 |
Growth_Hormone |
1. 0 |
353811 (100.0%) |
353929 |
0 |
|
118 |
AntiretroviralMed |
1. 0 |
353661 (99.9%) |
353929 |
0 |
|
119 |
HRT_Contraceptives |
1. 0 |
339072 (95.8%) |
353929 |
0 |
|
120 |
Antiosteoporotics |
1. 0 |
348232 (98.4%) |
353929 |
0 |
|
121 |
Testosterone |
1. 0 |
353534 (99.9%) |
353929 |
0 |
|
122 |
Thyreostatics |
1. 0 |
335216 (94.7%) |
353929 |
0 |
|
123 |
AntitubercularAntileproticMed |
1. 0 |
353870 (100.0%) |
353929 |
0 |
|
124 |
Antidepressants |
1. 0 |
329351 (93.1%) |
353929 |
0 |
|
125 |
Antineoplastics |
1. 0 |
349038 (98.6%) |
353929 |
0 |
|
126 |
Antipsychotics |
1. 0 |
352026 (99.5%) |
353929 |
0 |
|
127 |
Anxiolytics |
1. 0 |
350228 (99.0%) |
353929 |
0 |
|
128 |
MetabolicMed |
1. 0 |
259896 (73.4%) |
353929 |
0 |
|
129 |
PsychotropicMed |
1. 0 |
318765 (90.1%) |
353929 |
0 |
|
130 |
DxCancerAny |
1. 0 |
302293 (85.4%) |
353929 |
0 |
|
131 |
DxBodyCompAny |
1. 0 |
237204 (67.0%) |
353929 |
0 |
|
132 |
CancerRegister |
1. 0 |
307896 (87.0%) |
353929 |
0 |
|
133 |
DxConnectiveTissue |
1. 0 |
351616 (99.4%) |
353929 |
0 |
|
134 |
DxDiabetes |
1. 0 |
337737 (95.4%) |
353929 |
0 |
|
135 |
DxEndocrine |
1. 0 |
351906 (99.4%) |
353929 |
0 |
|
136 |
DxGlucose |
1. 0 |
353188 (99.8%) |
353929 |
0 |
|
137 |
DxHIV |
1. 0 |
353806 (100.0%) |
353929 |
0 |
|
138 |
DxIBD |
1. 0 |
338439 (95.6%) |
353929 |
0 |
|
139 |
DxIBS |
1. 0 |
349745 (98.8%) |
353929 |
0 |
|
140 |
DxLiver |
1. 0 |
349865 (98.9%) |
353929 |
0 |
|
141 |
DxPsy |
1. 0 |
327808 (92.6%) |
353929 |
0 |
|
142 |
DxMetabolic |
1. 0 |
317904 (89.8%) |
353929 |
0 |
|
143 |
DxMuscles |
1. 0 |
353033 (99.8%) |
353929 |
0 |
|
144 |
DxPancreatitis |
1. 0 |
353515 (99.9%) |
353929 |
0 |
|
145 |
DxThyroid |
1. 0 |
340750 (96.3%) |
353929 |
0 |
|
146 |
DxTuberculosis |
1. 0 |
353815 (100.0%) |
353929 |
0 |
|
147 |
F500 |
1. 0 |
353901 (100.0%) |
353929 |
0 |
|
148 |
F501 |
1. 0 |
353927 (100.0%) |
353929 |
0 |
|
149 |
F502 |
1. 0 |
353914 (100.0%) |
353929 |
0 |
|
150 |
F505 |
1. 0 |
353928 (100.0%) |
353929 |
0 |
|
151 |
F508 |
1. 0 |
353928 (100.0%) |
353929 |
0 |
|
152 |
F509 |
1. 0 |
353914 (100.0%) |
353929 |
0 |
|
153 |
SRAnyIllnessBC |
1. 0 |
267243 (75.5%) |
353929 |
0 |
|
154 |
SRAnyIllnessBC.0 |
1. 0 |
270118 (76.3%) |
353929 |
0 |
|
155 |
SRAnyIllnessBC.1 |
1. 0 |
350782 (99.1%) |
353929 |
0 |
|
156 |
SRAnyIllnessBC.2 |
1. 0 |
351284 (99.2%) |
353929 |
0 |
|
157 |
SRAnyPsyIllness |
1. 0 |
327619 (92.6%) |
353929 |
0 |
|
158 |
SRAnyNeuroIllness |
1. 0 |
345069 (97.5%) |
353929 |
0 |
|
159 |
SRAnyCancerBC |
1. 0 |
325632 (92.0%) |
353929 |
0 |
|
160 |
SRAnyCancer |
1. 0 |
324056 (91.6%) |
353929 |
0 |
|
161 |
SRAnyCancerNeuro |
1. 0 |
353740 (100.0%) |
353929 |
0 |
|
162 |
SRHeartAttack |
1. 1 |
8119 (100.0%) |
8119 |
345810 |
|
163 |
SRAngina |
1. 1 |
11004 (100.0%) |
11004 |
342925 |
|
164 |
SRStroke |
1. 1 |
5263 (100.0%) |
5263 |
348666 |
|
165 |
SRHighBloodPressure |
1. 1 |
94520 (100.0%) |
94520 |
259409 |
|
166 |
ICD.autoimmune |
Min : 0 |
0 : 326463 (92.2%) |
353929 |
0 |
|
167 |
ICD.autoinflammatory |
Min : 0 |
0 : 252761 (71.4%) |
353929 |
0 |
|
168 |
ICD.immunodeficiency |
Min : 0 |
0 : 353555 (99.9%) |
353929 |
0 |
|
169 |
ICD.memory |
Min : 0 |
0 : 350791 (99.1%) |
353929 |
0 |
|
170 |
ICD.metabolic |
Min : 0 |
0 : 298681 (84.4%) |
353929 |
0 |
|
171 |
ICD.psychiatric |
Min : 0 |
0 : 327808 (92.6%) |
353929 |
0 |
|
172 |
ICD.immunodysregulation |
Min : 0 |
0 : 248024 (70.1%) |
353929 |
0 |
|
173 |
SRAnorexiaNervosa.numeric |
Min : 0 |
0 : 116334 (99.4%) |
116985 |
236944 |
|
174 |
F500.numeric |
Min : 0 |
0 : 353901 (100.0%) |
353929 |
0 |
|
175 |
F501.numeric |
Min : 0 |
0 : 353927 (100.0%) |
353929 |
0 |
|
176 |
F502.numeric |
Min : 0 |
0 : 353914 (100.0%) |
353929 |
0 |
|
177 |
F505.numeric |
Min : 0 |
0 : 353928 (100.0%) |
353929 |
0 |
|
178 |
F508.numeric |
Min : 0 |
0 : 353928 (100.0%) |
353929 |
0 |
|
179 |
F509.numeric |
Min : 0 |
0 : 353914 (100.0%) |
353929 |
0 |
|
180 |
SRBulimiaNervosa.numeric |
Min : 0 |
0 : 116622 (99.7%) |
116985 |
236944 |
|
181 |
SRBingeEating.numeric |
Min : 0 |
0 : 116505 (99.6%) |
116985 |
236944 |
|
182 |
AN.count |
1. 0 |
116332 (99.4%) |
117003 |
236926 |
|
183 |
BN.count |
1. 0 |
116622 (99.7%) |
116994 |
236935 |
|
184 |
BED.count |
1. 0 |
116505 (99.6%) |
116985 |
236944 |
|
185 |
EDNOS.count |
1. 0 |
353914 (100.0%) |
353929 |
0 |
|
186 |
PUR.count |
1. 0 |
353928 (100.0%) |
353929 |
0 |
|
187 |
Pica.count |
1. 0 |
353928 (100.0%) |
353929 |
0 |
|
188 |
AN.count.numeric |
Min : 0 |
0 : 116332 (99.4%) |
117003 |
236926 |
|
189 |
BN.count.numeric |
Min : 0 |
0 : 116622 (99.7%) |
116994 |
236935 |
|
190 |
BED.count.numeric |
Min : 0 |
0 : 116505 (99.6%) |
116985 |
236944 |
|
191 |
EDNOS.count.numeric |
Min : 0 |
0 : 353914 (100.0%) |
353929 |
0 |
|
192 |
PUR.count.numeric |
Min : 0 |
0 : 353928 (100.0%) |
353929 |
0 |
|
193 |
Pica.count.numeric |
Min : 0 |
0 : 353928 (100.0%) |
353929 |
0 |
|
194 |
AN |
1. 0 |
115679 (99.4%) |
116350 |
237579 |
|
195 |
AN.numeric |
Min : 0 |
0 : 115679 (99.4%) |
116350 |
237579 |
|
196 |
BN |
1. 0 |
115679 (99.7%) |
116051 |
237878 |
|
197 |
BN.numeric |
Min : 0 |
0 : 115679 (99.7%) |
116051 |
237878 |
|
198 |
BED |
1. 0 |
115679 (99.6%) |
116159 |
237770 |
|
199 |
BED.numeric |
Min : 0 |
0 : 115679 (99.6%) |
116159 |
237770 |
|
200 |
ANpure.count |
Min : 0 |
0 : 116487 (99.5%) |
117023 |
236906 |
|
201 |
ANpure |
1. 0 |
115679 (99.5%) |
116215 |
237714 |
|
202 |
ANpure.numeric |
Min : 0 |
0 : 115679 (99.5%) |
116215 |
237714 |
|
203 |
BNpure.count |
Min : 0 |
0 : 116804 (99.8%) |
117023 |
236906 |
|
204 |
BNpure |
1. 0 |
115679 (99.8%) |
115898 |
238031 |
|
205 |
BNpure.numeric |
Min : 0 |
0 : 115679 (99.8%) |
115898 |
238031 |
|
206 |
BEDpure.count |
Min : 0 |
0 : 116623 (99.7%) |
117023 |
236906 |
|
207 |
BEDpure |
1. 0 |
115679 (99.7%) |
116079 |
237850 |
|
208 |
BEDpure.numeric |
Min : 0 |
0 : 115679 (99.7%) |
116079 |
237850 |
|
209 |
ED |
1. 0 |
115679 (98.9%) |
117022 |
236907 |
|
210 |
ED.control |
Min : 0 |
0 : 115681 (98.9%) |
117011 |
236918 |
|
211 |
PC1 |
Mean (sd) : 0 (0) |
350118 distinct values |
353929 |
0 |
|
212 |
PC2 |
Mean (sd) : 0 (0) |
347369 distinct values |
353929 |
0 |
|
213 |
PC3 |
Mean (sd) : 0 (0) |
350875 distinct values |
353929 |
0 |
|
214 |
PC4 |
Mean (sd) : 0 (0) |
351213 distinct values |
353929 |
0 |
|
215 |
PC5 |
Mean (sd) : 0 (0) |
351068 distinct values |
353929 |
0 |
|
216 |
PC6 |
Mean (sd) : 0 (0) |
350560 distinct values |
353929 |
0 |
|
217 |
PC7 |
Mean (sd) : 0 (0) |
350903 distinct values |
353929 |
0 |
|
218 |
PC8 |
Mean (sd) : 0 (0) |
351009 distinct values |
353929 |
0 |
|
219 |
PC9 |
Mean (sd) : 0 (0) |
351079 distinct values |
353929 |
0 |
|
220 |
PC10 |
Mean (sd) : 0 (0) |
350960 distinct values |
353929 |
0 |
|
221 |
Array |
1. UKBB |
315486 (89.1%) |
353929 |
0 |
|
222 |
Batch |
1. Batch_b001 |
3381 ( 1.0%) |
353929 |
0 |
|
223 |
Menopause_bin |
1. 0 |
228955 (64.7%) |
353929 |
0 |
Export data
write.table(complete,
file = paste("data/BCpheno_complete",date,".txt", sep =""),
quote = T,
col.names = T,
row.names = F)
write.table(complete[,1:2],
file = paste("data/BCpheno_complete_IDs",date,".txt", sep =""),
quote = T,
col.names = T,
row.names = F)
without medication, cancer, somatic disease influencing BC, psychiatric disorder, did not answer smoking or alcohol
healthy <- complete %>%
filter(Tobacco_current != -3 &
Alcohol_frequency != -3 &
(is.na(SRAnyMDX) | SRAnyMDX == 0) &
MetabolicMed == 0 &
DxCancerAny == 0 &
DxBodyCompAny == 0 &
SRAnyIllnessBC == 0 &
SRAnyCancerBC == 0)
#healthy <- complete %>%
# filter((is.na(NoSRConditions) | NoSRConditions == 1) &
# MetabolicMed == 0 &
# DxCancerAny == 0 &
# DxBodyCompAny == 0 &
# SRAnyIllnessBC == 0 &
# SRAnyCancer == 0)
dim(healthy)
[1] 155938 224
# 155961 321 OLD
# 191162 old without MHQ
## 173374 new excluding participants who endorsed a psychiatric disorder via MHQ
## 155961 new excluding, ICD, cancer register, self-report, MHQ
Female and male subset
### Female
healthy_female <- subset(healthy, Gender == "Female")
dim(healthy_female)
[1] 70686 224
# 70700 321
### Male
healthy_male <- subset(healthy, Gender == "Male")
dim(healthy_male)
[1] 85252 224
# 85261 321
Descriptives: Healthy (150K)
summarytools::dfSummary(healthy[,-1],
bootstrap.css = FALSE, # Already part of the theme so no need for it
plain.ascii = FALSE, # One of the essential settings
style = "grid", # Idem.
dfSummary.silent = TRUE, # Suppresses messages about temporary files
footnote = NA, # Keeping the results minimalistic
subtitle.emphasis = FALSE,
graph.magnif = 0.75,
tmp.img.dir = "/tmp")
Dimensions: 155938 x 223
Duplicates: 0
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Valid | Missing |
---|---|---|---|---|---|---|
1 |
Passed_GQC |
1. 0 |
0 ( 0.0%) |
155938 |
0 |
|
2 |
EuropeanGenetic |
1 distinct value |
1 : 155938 (100.0%) |
155938 |
0 |
|
3 |
Gender |
1. Female |
70686 (45.3%) |
155938 |
0 |
|
4 |
Age |
Mean (sd) : 54.9 (8.1) |
36 distinct values |
155938 |
0 |
|
5 |
Height |
Mean (sd) : 170.4 (9.3) |
486 distinct values |
155938 |
0 |
|
6 |
Weight |
Mean (sd) : 78.1 (15.1) |
1120 distinct values |
155938 |
0 |
|
7 |
BMI |
Mean (sd) : 26.8 (4.2) |
392 distinct values |
155938 |
0 |
|
8 |
WC |
Mean (sd) : 89.4 (12.6) |
577 distinct values |
155938 |
0 |
|
9 |
HC |
Mean (sd) : 102.5 (8.1) |
439 distinct values |
155938 |
0 |
|
10 |
BFPC |
Mean (sd) : 29.3 (8.2) |
516 distinct values |
155938 |
0 |
|
11 |
FM |
Mean (sd) : 23.1 (8.5) |
696 distinct values |
155938 |
0 |
|
12 |
FFM |
Mean (sd) : 55.1 (11.6) |
666 distinct values |
155938 |
0 |
|
13 |
SES |
Mean (sd) : -1.7 (2.9) |
38382 distinct values |
155938 |
0 |
|
14 |
Tobacco_current_orig |
Mean (sd) : 0.1 (0.4) |
0 : 142121 (91.1%) |
155938 |
0 |
|
15 |
Alcohol_frequency_orig |
Mean (sd) : 2.7 (1.4) |
1 : 34825 (22.3%) |
155938 |
0 |
|
16 |
Centre |
1. 10003 |
122 ( 0.1%) |
155938 |
0 |
|
17 |
WHR |
Mean (sd) : 0.9 (0.1) |
5166 distinct values |
155938 |
0 |
|
18 |
Menopause_new |
1. 0 |
85252 (54.7%) |
155938 |
0 |
|
19 |
Pregnancy_no_NA |
1. 0 |
85252 (54.7%) |
155938 |
0 |
|
20 |
Alcohol_frequency |
1. Never |
6855 ( 4.4%) |
155938 |
0 |
|
21 |
Tobacco_current |
1. No |
142121 (91.1%) |
155938 |
0 |
|
22 |
WHRadjBMI |
Mean (sd) : 0 (0.1) |
65734 distinct values |
155938 |
0 |
|
23 |
WCadjBMI |
Mean (sd) : 0.5 (7.6) |
11849 distinct values |
155938 |
0 |
|
24 |
HCadjBMI |
Mean (sd) : 0.2 (4.4) |
9254 distinct values |
155938 |
0 |
|
25 |
WHRadjBFPC |
Mean (sd) : 0 (0.1) |
96394 distinct values |
155938 |
0 |
|
26 |
WCadjBFPC |
Mean (sd) : -0.1 (12.5) |
21070 distinct values |
155938 |
0 |
|
27 |
HCadjBFPC |
Mean (sd) : 0.5 (7) |
16122 distinct values |
155938 |
0 |
|
28 |
FFMadjHeight |
Mean (sd) : -0.1 (6.6) |
19878 distinct values |
155938 |
0 |
|
29 |
BFPCadjHeight |
Mean (sd) : -1.2 (7) |
21482 distinct values |
155938 |
0 |
|
30 |
BFPC_trunk |
Mean (sd) : 29.5 (7.6) |
570 distinct values |
155841 |
97 |
|
31 |
FM_trunk |
Mean (sd) : 13 (4.8) |
399 distinct values |
155833 |
105 |
|
32 |
FFM_trunk |
Mean (sd) : 30.6 (6.1) |
375 distinct values |
155803 |
135 |
|
33 |
Impedance_wb |
Mean (sd) : 592.2 (85.7) |
644 distinct values |
155929 |
9 |
|
34 |
Ethnicity |
1. European |
154868 (99.7%) |
155385 |
553 |
|
35 |
Region |
1. 1 |
0 ( 0.0%) |
154340 |
1598 |
|
36 |
Income |
1. -3 |
13878 ( 8.9%) |
155664 |
274 |
|
37 |
Birth_weight |
Mean (sd) : 3.4 (0.6) |
289 distinct values |
90603 |
65335 |
|
38 |
Menarche_age_at |
Mean (sd) : 12.6 (2.8) |
22 distinct values |
70686 |
85252 |
|
39 |
Contraceptive_ever |
1. -3 |
63 ( 0.1%) |
70686 |
85252 |
|
40 |
HRT |
1. -3 |
21 ( 0.0%) |
70686 |
85252 |
|
41 |
Breastfed |
1. -3 |
46 ( 0.0%) |
155938 |
0 |
|
42 |
VAT |
Mean (sd) : 1215.2 (877.1) |
1148 distinct values |
1520 |
154418 |
|
43 |
BMC_wb |
Mean (sd) : 2745.7 (580.4) |
1087 distinct values |
1536 |
154402 |
|
44 |
BMD_total |
Mean (sd) : 1.2 (0.2) |
562 distinct values |
1536 |
154402 |
|
45 |
BipolarInitialQ |
1. 1 |
62 (26.2%) |
237 |
155701 |
|
46 |
Age.At.MHQ |
Mean (sd) : 63 (7.7) |
35 distinct values |
47666 |
108272 |
|
47 |
Migrant.Status |
1. 0 |
44729 (93.9%) |
47656 |
108282 |
|
48 |
Highest.Qualification |
1. ALevel |
6507 (13.7%) |
47477 |
108461 |
|
49 |
Smoker |
1. Current |
2923 ( 6.1%) |
47666 |
108272 |
|
50 |
Longstanding.Illness |
1. 0 |
40506 (86.1%) |
47072 |
108866 |
|
51 |
Diabetes |
1. 0 |
47573 (99.9%) |
47623 |
108315 |
|
52 |
Cancer |
1. 0 |
47504 (99.7%) |
47632 |
108306 |
|
53 |
CVD |
1. 0 |
40499 (85.0%) |
47625 |
108313 |
|
54 |
Respiratory |
1. 0 |
33189 (69.7%) |
47649 |
108289 |
|
55 |
Neuroticism |
Mean (sd) : 3 (2.7) |
13 distinct values |
40810 |
115128 |
|
56 |
SRSocPhobia |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
57 |
SRSchizophrenia |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
58 |
SRPsychosisOther |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
59 |
SRPsychosisAny |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
60 |
SRPersonalityDisorder |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
61 |
SROtherPhobia |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
62 |
SRPanicAttacks |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
63 |
SROCD |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
64 |
SRManiaBIP |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
65 |
SRDepression |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
66 |
SRMood |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
67 |
SRBulimiaNervosa |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
68 |
SRBingeEating |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
69 |
SRASD |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
70 |
SRGADandOthers |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
71 |
SRAnorexiaNervosa |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
72 |
SREatingDisorderAny |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
73 |
SRAgoraphobia |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
74 |
SRAnxietyAny |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
75 |
SRADHD |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
76 |
SRPNTA |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
77 |
SmithDepression |
1. 0 |
11606 (92.4%) |
12559 |
143379 |
|
78 |
PHQ9.No.Info |
1. 0 |
47635 (99.9%) |
47666 |
108272 |
|
79 |
PHQ9.Screen |
1. 0 |
38732 (81.3%) |
47666 |
108272 |
|
80 |
PHQ9.Items |
1. 0 |
40089 (84.1%) |
47666 |
108272 |
|
81 |
PHQ9.Severity |
Mean (sd) : 1.9 (2.7) |
27 distinct values |
47666 |
108272 |
|
82 |
Depressed.Ever |
1. 0 |
36745 (88.4%) |
41574 |
114364 |
|
83 |
Depressed.Ever.Severe |
1. 0 |
41308 (99.4%) |
41574 |
114364 |
|
84 |
Recurrent.Depression |
1. 0 |
13220 (86.0%) |
15365 |
140573 |
|
85 |
Single.Depression |
1. 0 |
12751 (83.0%) |
15365 |
140573 |
|
86 |
SmithBipolar |
1. 0 |
12520 (99.7%) |
12559 |
143379 |
|
87 |
GAD7.Severity |
Mean (sd) : 1.4 (2.5) |
22 distinct values |
47647 |
108291 |
|
88 |
GAD.Ever |
1. 0 |
36416 (97.8%) |
37253 |
118685 |
|
89 |
GAD.Current |
1. 0 |
37084 (99.6%) |
37252 |
118686 |
|
90 |
AUDIT.Score |
Mean (sd) : 5.3 (4) |
34 distinct values |
47666 |
108272 |
|
91 |
Alcohol.Use.Disorder |
1. 0 |
16764 (60.1%) |
27916 |
128022 |
|
92 |
Cannabis.Ever |
1. 0 |
36380 (76.4%) |
47600 |
108338 |
|
93 |
Cannabis.Daily |
1. 0 |
10660 (94.5%) |
11286 |
144652 |
|
94 |
Addiction.Ever.SelfReport |
1. 0 |
45330 (96.0%) |
47245 |
108693 |
|
95 |
Addiction.Ever |
1. 0 |
44141 (95.8%) |
46056 |
109882 |
|
96 |
Substance.Addiction.Ever |
1. 0 |
1107 (57.8%) |
1914 |
154024 |
|
97 |
Alcohol.Dependence.Ever |
1. 0 |
478 (83.6%) |
572 |
155366 |
|
98 |
Addiction.Current |
1. 0 |
617 (50.5%) |
1222 |
154716 |
|
99 |
Trauma.Childhood |
1. 0 |
27978 (59.6%) |
46943 |
108995 |
|
100 |
Trauma.Adult |
1. 0 |
24536 (53.2%) |
46080 |
109858 |
|
101 |
Trauma.Catastrophic |
1. 0 |
27077 (56.8%) |
47662 |
108276 |
|
102 |
PTSD |
1. 0 |
46367 (97.5%) |
47551 |
108387 |
|
103 |
Self.Harm.Ever |
1. 0 |
46717 (98.2%) |
47591 |
108347 |
|
104 |
Not.Worth.Living |
Min : 0 |
0 : 37962 (79.6%) |
47666 |
108272 |
|
105 |
Self.Harm.Suicide.Attempt |
1. 0 |
568 (66.9%) |
849 |
155089 |
|
106 |
WellbeingScore |
Mean (sd) : 13.1 (1.8) |
15 distinct values |
46375 |
109563 |
|
107 |
NoSRConditions |
1. 0 |
1915 ( 4.0%) |
47666 |
108272 |
|
108 |
MHQ |
1. 1 |
47666 (100.0%) |
47666 |
108272 |
|
109 |
SRAnyMDX |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
110 |
SRANpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
111 |
SRBNpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
112 |
SRBEDpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
113 |
Corticoids |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
114 |
Antidiabetics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
115 |
Diuretics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
116 |
Gonadotropins |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
117 |
Growth_Hormone |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
118 |
AntiretroviralMed |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
119 |
HRT_Contraceptives |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
120 |
Antiosteoporotics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
121 |
Testosterone |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
122 |
Thyreostatics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
123 |
AntitubercularAntileproticMed |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
124 |
Antidepressants |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
125 |
Antineoplastics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
126 |
Antipsychotics |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
127 |
Anxiolytics |
1. 0 |
155483 (99.7%) |
155938 |
0 |
|
128 |
MetabolicMed |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
129 |
PsychotropicMed |
1. 0 |
155483 (99.7%) |
155938 |
0 |
|
130 |
DxCancerAny |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
131 |
DxBodyCompAny |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
132 |
CancerRegister |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
133 |
DxConnectiveTissue |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
134 |
DxDiabetes |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
135 |
DxEndocrine |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
136 |
DxGlucose |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
137 |
DxHIV |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
138 |
DxIBD |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
139 |
DxIBS |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
140 |
DxLiver |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
141 |
DxPsy |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
142 |
DxMetabolic |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
143 |
DxMuscles |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
144 |
DxPancreatitis |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
145 |
DxThyroid |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
146 |
DxTuberculosis |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
147 |
F500 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
148 |
F501 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
149 |
F502 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
150 |
F505 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
151 |
F508 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
152 |
F509 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
153 |
SRAnyIllnessBC |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
154 |
SRAnyIllnessBC.0 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
155 |
SRAnyIllnessBC.1 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
156 |
SRAnyIllnessBC.2 |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
157 |
SRAnyPsyIllness |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
158 |
SRAnyNeuroIllness |
1. 0 |
153316 (98.3%) |
155938 |
0 |
|
159 |
SRAnyCancerBC |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
160 |
SRAnyCancer |
1. 0 |
155370 (99.6%) |
155938 |
0 |
|
161 |
SRAnyCancerNeuro |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
162 |
SRHeartAttack |
1. 1 |
1406 (100.0%) |
1406 |
154532 |
|
163 |
SRAngina |
1. 1 |
1701 (100.0%) |
1701 |
154237 |
|
164 |
SRStroke |
1. 1 |
1011 (100.0%) |
1011 |
154927 |
|
165 |
SRHighBloodPressure |
1. 1 |
26577 (100.0%) |
26577 |
129361 |
|
166 |
ICD.autoimmune |
Min : 0 |
0 : 153497 (98.4%) |
155938 |
0 |
|
167 |
ICD.autoinflammatory |
Min : 0 |
0 : 143545 (92.0%) |
155938 |
0 |
|
168 |
ICD.immunodeficiency |
Min : 0 |
0 : 155914 (100.0%) |
155938 |
0 |
|
169 |
ICD.memory |
Min : 0 |
0 : 155693 (99.8%) |
155938 |
0 |
|
170 |
ICD.metabolic |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
171 |
ICD.psychiatric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
172 |
ICD.immunodysregulation |
Min : 0 |
0 : 142471 (91.4%) |
155938 |
0 |
|
173 |
SRAnorexiaNervosa.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
174 |
F500.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
175 |
F501.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
176 |
F502.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
177 |
F505.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
178 |
F508.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
179 |
F509.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
180 |
SRBulimiaNervosa.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
181 |
SRBingeEating.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
182 |
AN.count |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
183 |
BN.count |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
184 |
BED.count |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
185 |
EDNOS.count |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
186 |
PUR.count |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
187 |
Pica.count |
1. 0 |
155938 (100.0%) |
155938 |
0 |
|
188 |
AN.count.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
189 |
BN.count.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
190 |
BED.count.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
191 |
EDNOS.count.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
192 |
PUR.count.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
193 |
Pica.count.numeric |
1 distinct value |
0 : 155938 (100.0%) |
155938 |
0 |
|
194 |
AN |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
195 |
AN.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
196 |
BN |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
197 |
BN.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
198 |
BED |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
199 |
BED.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
200 |
ANpure.count |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
201 |
ANpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
202 |
ANpure.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
203 |
BNpure.count |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
204 |
BNpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
205 |
BNpure.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
206 |
BEDpure.count |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
207 |
BEDpure |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
208 |
BEDpure.numeric |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
209 |
ED |
1. 0 |
47666 (100.0%) |
47666 |
108272 |
|
210 |
ED.control |
1 distinct value |
0 : 47666 (100.0%) |
47666 |
108272 |
|
211 |
PC1 |
Mean (sd) : 0 (0) |
155204 distinct values |
155938 |
0 |
|
212 |
PC2 |
Mean (sd) : 0 (0) |
154615 distinct values |
155938 |
0 |
|
213 |
PC3 |
Mean (sd) : 0 (0) |
155352 distinct values |
155938 |
0 |
|
214 |
PC4 |
Mean (sd) : 0 (0) |
155402 distinct values |
155938 |
0 |
|
215 |
PC5 |
Mean (sd) : 0 (0) |
155375 distinct values |
155938 |
0 |
|
216 |
PC6 |
Mean (sd) : 0 (0) |
155296 distinct values |
155938 |
0 |
|
217 |
PC7 |
Mean (sd) : 0 (0) |
155341 distinct values |
155938 |
0 |
|
218 |
PC8 |
Mean (sd) : 0 (0) |
155366 distinct values |
155938 |
0 |
|
219 |
PC9 |
Mean (sd) : 0 (0) |
155388 distinct values |
155938 |
0 |
|
220 |
PC10 |
Mean (sd) : 0 (0) |
155345 distinct values |
155938 |
0 |
|
221 |
Array |
1. UKBB |
140193 (89.9%) |
155938 |
0 |
|
222 |
Batch |
1. Batch_b001 |
1499 ( 1.0%) |
155938 |
0 |
|
223 |
Menopause_bin |
1. 0 |
111489 (71.5%) |
155938 |
0 |
Export data
write.table(healthy,
file = paste("data/BCpheno_healthy",date,".txt", sep =""),
quote = T,
col.names = T,
row.names = F)
write.table(healthy[,1:3],
file = paste("data/BCpheno_healthy_IDs",date,".txt", sep =""),
quote = T,
col.names = T,
row.names = F)
EDcases <- GIDpheno %>%
filter(ED == 1 # has an eating disorder: MHQ, or main ICD, or secondary ICD diagnosis
)
EDcases$EDcontrol <- 0
dim(EDcases)
[1] 1550 224
EDcases_female <- subset(EDcases, Gender == "Female")
EDcases_female_N <- nrow(EDcases_female)
EDcases_male <- subset(EDcases, Gender == "Male")
EDcases_male_N <- nrow(EDcases_male)
EDcontrols <- GIDpheno %>%
filter(MHQ == 1 & # has answered the mental health questionnaire (MHQ)
!is.na(MHQ) &
SRAnyMDX == 0 &
PsychotropicMed == 0 & # Any self-reported psychotropic medication
DxPsy == 0 & # Any ICD F code
Tobacco_current != -3 & # Did not answer tobacco question
Alcohol_frequency != -3 & # Did not answer alcohol question
SRAnyPsyIllness == 0 # Did self-report any psychiatric disorder in the sign up questionnaire
)
dim(EDcontrols)
[1] 80986 223
# Check if any cases are contained in the controls
length(setdiff(EDcases$IID, EDcontrols$IID))
[1] 1550
EDcontrols_female <- subset(EDcontrols, Gender == "Female")
EDcontrols_female_N <- nrow(EDcontrols_female)
EDcontrols_male <- subset(EDcontrols, Gender == "Male")
EDcontrols_male_N <- nrow(EDcontrols_male)
Sample controls in 10:1 ratio
# Case control ratio
c_c_ratio = 10
EDcontrols_female_N = EDcases_female_N * c_c_ratio
EDcontrols_female_N
[1] 14300
EDcontrols_male_N = EDcases_male_N * c_c_ratio
EDcontrols_male_N
[1] 1200
# Randomly select controls in c_c_ratio
set.seed(23497)
EDcontrols_female_random <- dplyr::sample_n(EDcontrols_female, EDcontrols_female_N, replace = FALSE)
dim(EDcontrols_female_random)
[1] 14300 223
# Randomly select male 1:4 controls
set.seed(23497)
EDcontrols_male_random <- dplyr::sample_n(EDcontrols_male, EDcontrols_male_N, replace = FALSE)
dim(EDcontrols_male_random)
[1] 1200 223
# Rowbind female and male controls
EDcontrols_random <- rbind(EDcontrols_female_random, EDcontrols_male_random)
EDcontrols_random$EDcontrol <- 1
## Rowbind cases and controls
EDcases_controls <- rbind(EDcases, EDcontrols_random)
dim(EDcases_controls)
[1] 17050 224
summary(EDcases_controls[,EDcols])
AN.count BN.count BED.count EDNOS.count PUR.count Pica.count
0 :16257 0 :16582 0 :16430 0:17027 0:17049 0:17050
1 : 768 1 : 423 1 : 561 1: 23 1: 1 1: 0
NA's: 25 NA's: 45 NA's: 59
AN BN BED ANpure BNpure
0 :15500 0 :15500 0 :15500 0 :15500 0 :15500
1 : 768 1 : 423 1 : 561 1 : 615 1 : 251
NA's: 782 NA's: 1127 NA's: 989 NA's: 935 NA's: 1299
BEDpure ED SRAnorexiaNervosa SRBulimiaNervosa SRBingeEating
0 :15500 0:15500 0 :16259 0 :16582 0 :16430
1 : 468 1: 1550 1 : 732 1 : 409 1 : 561
NA's: 1082 NA's: 59 NA's: 59 NA's: 59
SREatingDisorderAny SRANpure SRBNpure SRBEDpure
0 :15503 0 :16265 0 :16588 0 :16436
1 : 1488 1 : 726 1 : 403 1 : 555
NA's: 59 NA's: 59 NA's: 59 NA's: 59
# Write output and IID lists
write.table(EDcases, file = paste("EDcases/EDcases",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcases_female, file = paste("EDcases/EDcases_female",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcases_male, file = paste("EDcases/EDcases_male",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcases$IID, file = paste("EDcases/EDcases_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcases_female$IID, file = paste("EDcases/EDcases_female_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcases_male$IID, file = paste("EDcases/EDcases_male_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcontrols_random, file = paste("EDcases/EDcontrols",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcontrols_female_random, file = paste("EDcases/EDcontrols_female",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcontrols_male_random, file = paste("EDcases/EDcontrols_male",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcontrols_random$IID, file = paste("EDcases/EDcontrols_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcontrols_female_random$IID, file = paste("EDcases/EDcontrols_female_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcontrols_male_random$IID, file = paste("EDcases/EDcontrols_male_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
write.table(EDcases_controls, file = paste("EDcases/EDcases_controls",date,".txt", sep =""),
quote = T, col.names = T, row.names = F)
write.table(EDcases_controls$IID, file = paste("EDcases/EDcases_controls_ids",date,".txt", sep =""),
quote = F, col.names = F, row.names = F)
Eating disorders crosstables for MPRS
ctable(x = EDcases_controls$AN, y = EDcases_controls$BN)
Cross-Tabulation, Row Proportions
AN * BN
Data Frame: EDcases_controls
------- ---- ---------------- ------------- -------------- ----------------
BN 0 1 <NA> Total
AN
0 15500 (100.0%) 0 ( 0.0%) 0 ( 0.0%) 15500 (100.0%)
1 0 ( 0.0%) 126 (16.4%) 642 (83.6%) 768 (100.0%)
<NA> 0 ( 0.0%) 297 (38.0%) 485 (62.0%) 782 (100.0%)
Total 15500 ( 90.9%) 423 ( 2.5%) 1127 ( 6.6%) 17050 (100.0%)
------- ---- ---------------- ------------- -------------- ----------------
ctable(x = EDcases_controls$AN, y = EDcases_controls$BED)
Cross-Tabulation, Row Proportions
AN * BED
Data Frame: EDcases_controls
------- ----- ---------------- ------------- ------------- ----------------
BED 0 1 <NA> Total
AN
0 15500 (100.0%) 0 ( 0.0%) 0 ( 0.0%) 15500 (100.0%)
1 0 ( 0.0%) 46 ( 6.0%) 722 (94.0%) 768 (100.0%)
<NA> 0 ( 0.0%) 515 (65.9%) 267 (34.1%) 782 (100.0%)
Total 15500 ( 90.9%) 561 ( 3.3%) 989 ( 5.8%) 17050 (100.0%)
------- ----- ---------------- ------------- ------------- ----------------
ctable(x = EDcases_controls$BN, y = EDcases_controls$BED)
Cross-Tabulation, Row Proportions
BN * BED
Data Frame: EDcases_controls
------- ----- ---------------- ------------- ------------- ----------------
BED 0 1 <NA> Total
BN
0 15500 (100.0%) 0 ( 0.0%) 0 ( 0.0%) 15500 (100.0%)
1 0 ( 0.0%) 68 (16.1%) 355 (83.9%) 423 (100.0%)
<NA> 0 ( 0.0%) 493 (43.7%) 634 (56.3%) 1127 (100.0%)
Total 15500 ( 90.9%) 561 ( 3.3%) 989 ( 5.8%) 17050 (100.0%)
------- ----- ---------------- ------------- ------------- ----------------
Phenotype file for multi-polygenic risk scoring
EDcases_red <- EDcases_controls %>%
select(IID,
Age, BMI, Gender, SES, Ethnicity, BMD_total, BFPC,
PC1, PC2, PC3, PC4, PC5, PC6, PC7, PC8, PC9, PC10,
AN, BN, BED, ED, EDcontrol, MHQ,
ANpure, BNpure, BEDpure,
PsychotropicMed, Antidepressants, Anxiolytics, Antipsychotics,
DxCancerAny)
MPRSUKB <- EDcases_controls %>%
select(IID,
AN, BN, BED, ED,
Gender,
PC1, PC2, PC3, PC4, PC5, PC6
)
MPRSUKB$FID <- MPRSUKB$IID
# Reorder
MPRSUKB <- MPRSUKB %>% select(FID, IID, everything())
dim(MPRSUKB)
[1] 17050 13
# Recode gender
MPRSUKB$Gender <- factor(MPRSUKB$Gender, levels = c("Female", "Male"), labels = c(0, 1))
write.table(MPRSUKB,
paste("data/EDcases_controls_mprs",date,".txt", sep =""),
quote = F,
row.names = F,
col.names = T,
sep = "\t")
Descriptives: MPRS
summarytools::dfSummary(MPRSUKB[,-1],
bootstrap.css = FALSE, # Already part of the theme so no need for it
plain.ascii = FALSE, # One of the essential settings
style = "grid", # Idem.
dfSummary.silent = TRUE, # Suppresses messages about temporary files
footnote = NA, # Keeping the results minimalistic
subtitle.emphasis = FALSE,
graph.magnif = 0.75,
tmp.img.dir = "/tmp")
Dimensions: 17050 x 12
Duplicates: 0
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Valid | Missing |
---|---|---|---|---|---|---|
1 |
IID |
Mean (sd) : 3515252.3 (1447368.2) |
17050 distinct values |
17050 |
0 |
|
2 |
AN |
1. 0 |
15500 (95.3%) |
16268 |
782 |
|
3 |
BN |
1. 0 |
15500 (97.3%) |
15923 |
1127 |
|
4 |
BED |
1. 0 |
15500 (96.5%) |
16061 |
989 |
|
5 |
ED |
1. 0 |
15500 (90.9%) |
17050 |
0 |
|
6 |
Gender |
1. 0 |
15730 (92.3%) |
17050 |
0 |
|
7 |
PC1 |
Mean (sd) : 0 (0) |
17042 distinct values |
17050 |
0 |
|
8 |
PC2 |
Mean (sd) : 0 (0) |
17030 distinct values |
17050 |
0 |
|
9 |
PC3 |
Mean (sd) : 0 (0) |
17039 distinct values |
17050 |
0 |
|
10 |
PC4 |
Mean (sd) : 0 (0) |
17043 distinct values |
17050 |
0 |
|
11 |
PC5 |
Mean (sd) : 0 (0) |
17042 distinct values |
17050 |
0 |
|
12 |
PC6 |
Mean (sd) : 0 (0) |
17040 distinct values |
17050 |
0 |
Extract data for diet questionnaire & inflammation
autoimmune.diet.variables <- c(
"IID",
"Gender",
"Age",
"Height",
"Weight",
"BMI",
"WC",
"HC",
"BFPC",
"FM",
"FFM",
"SES",
"Highest.Qualification",
"Ethnicity",
"Centre",
"WHR",
"Pregnancy_no_NA",
"Menopause_new",
"Alcohol_frequency",
"Tobacco_current",
"Smoker",
"WHRadjBMI",
"SRANpure",
"SRBNpure",
"SRBEDpure",
"MetabolicMed",
"Corticoids",
"Antidiabetics",
"Diuretics",
"Gonadotropins",
"Growth_Hormone",
"AntiretroviralMed",
"HRT_Contraceptives",
"Antiosteoporotics",
"Testosterone",
"Thyreostatics",
"AntitubercularAntileproticMed",
"Antidepressants",
"Antineoplastics",
"Antipsychotics",
"PsychotropicMed",
"Corticoids",
"Testosterone",
"Antidepressants",
"Anxiolytics",
"Antipsychotics",
"DxCancerAny",
"DxBodyCompAny",
"DxConnectiveTissue",
"DxDiabetes",
"DxEndocrine",
"DxGlucose",
"DxHIV",
"DxIBD",
"DxIBS",
"DxLiver",
"DxPsy",
"DxMetabolic",
"DxMuscles",
"DxPancreatitis",
"DxThyroid",
"DxTuberculosis",
"Trauma.Childhood",
"Trauma.Adult",
"Trauma.Catastrophic",
"PTSD",
"PHQ9.Severity",
"ICD.autoimmune",
"ICD.autoinflammatory",
"ICD.immunodeficiency",
"ICD.memory",
"ICD.metabolic",
"ICD.psychiatric",
"ICD.immunodysregulation",
"SRAnyIllnessBC",
"SRAnyPsyIllness",
"SRAnyNeuroIllness",
"SRAnyCancerNeuro",
"SRStroke",
"AN",
"BN",
"BED",
"ANpure",
"BNpure",
"BEDpure",
"PC1",
"PC2",
"PC3",
"PC4",
"PC5",
"PC6",
"EuropeanGenetic"
)
data.autoimmune.diet <- GID_pheno_all_genetic %>%
select(autoimmune.diet.variables)
colnames(data.autoimmune.diet)
[1] "IID" "Gender"
[3] "Age" "Height"
[5] "Weight" "BMI"
[7] "WC" "HC"
[9] "BFPC" "FM"
[11] "FFM" "SES"
[13] "Highest.Qualification" "Ethnicity"
[15] "Centre" "WHR"
[17] "Pregnancy_no_NA" "Menopause_new"
[19] "Alcohol_frequency" "Tobacco_current"
[21] "Smoker" "WHRadjBMI"
[23] "SRANpure" "SRBNpure"
[25] "SRBEDpure" "MetabolicMed"
[27] "Corticoids" "Antidiabetics"
[29] "Diuretics" "Gonadotropins"
[31] "Growth_Hormone" "AntiretroviralMed"
[33] "HRT_Contraceptives" "Antiosteoporotics"
[35] "Testosterone" "Thyreostatics"
[37] "AntitubercularAntileproticMed" "Antidepressants"
[39] "Antineoplastics" "Antipsychotics"
[41] "PsychotropicMed" "Anxiolytics"
[43] "DxCancerAny" "DxBodyCompAny"
[45] "DxConnectiveTissue" "DxDiabetes"
[47] "DxEndocrine" "DxGlucose"
[49] "DxHIV" "DxIBD"
[51] "DxIBS" "DxLiver"
[53] "DxPsy" "DxMetabolic"
[55] "DxMuscles" "DxPancreatitis"
[57] "DxThyroid" "DxTuberculosis"
[59] "Trauma.Childhood" "Trauma.Adult"
[61] "Trauma.Catastrophic" "PTSD"
[63] "PHQ9.Severity" "ICD.autoimmune"
[65] "ICD.autoinflammatory" "ICD.immunodeficiency"
[67] "ICD.memory" "ICD.metabolic"
[69] "ICD.psychiatric" "ICD.immunodysregulation"
[71] "SRAnyIllnessBC" "SRAnyPsyIllness"
[73] "SRAnyNeuroIllness" "SRAnyCancerNeuro"
[75] "SRStroke" "AN"
[77] "BN" "BED"
[79] "ANpure" "BNpure"
[81] "BEDpure" "PC1"
[83] "PC2" "PC3"
[85] "PC4" "PC5"
[87] "PC6" "EuropeanGenetic"
save(object = data.autoimmune.diet, file = paste0("data/data.autoimmune.diet",date,".Rdata"))