This is code for looping through the effects of IGF1 on metabolites and the effects of metabolites on IGF1. Metabolites have been log-transformed and scaled and the linear models adjusted using robust standard errors with clustering by ‘centre’. I included ‘casecontrol’ in the models as a covariate, but only to hold the space in the code so that it can be easily modified. We could run stratified analyses or adjust for any relevant covariates. I did not transform ‘igf1’.

install.packages('lattice', repos='http://cran.r-project.org')
install.packages('readr',repos='http://cran.r-project.org')
install.packages('readxl',repos='http://cran.r-project.org')
install.packages('sandwich',repos='http://cran.r-project.org')
install.packages('lmtest',  repos='http://cran.r-project.org')
library('lattice')
library('readr')
library('readxl')
library('sandwich')
library('lmtest')

Basic correlation between IGF1 and 228 metabolites

# Read-in the data

vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)

# Log-transform and scale the metabolites

logtransform<-function(vitD_mets,metnames)  
{   
  tx=vitD_mets[metnames]    
  tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })  
  tx=log(tx)    
  vitD_mets[,metnames]=tx   
  vitD_mets
}   

mets_logged=logtransform(vitD_met,metnames)

mets_logged.scaled=mets_logged
mets_logged.scaled[,metnames]=scale(mets_logged.scaled[,metnames])

# Select just the metabolites and IGF1 variable
new=data.frame(mets_logged.scaled[-c(231:331)], mets_logged.scaled$igf1,mets_logged.scaled$igf2)
new=new[-c(237:263)]
new=new[-c(232:236)]
new=new[-c(1:2)]
colnames(new)

# Remove missing data

newdata <- na.omit(new)

# Correlate

cor=cor(newdata)

# Save as a data.frame

igf1=as.data.frame(cor)

# Makes the metabolite rownames a column in the dataframe 

igf1$metabolites=rownames(igf1)

# Select just the "igf1" and "metabolites" columns

myvars <- c("igf1", "metabolites")
igf1 <- igf1[myvars]

# Order the correlations

igf1 <- igf1[order(igf1$igf1),]

# Remove scientific notation (for ease of viewing)

options(scipen = 999)

# Save file

write.csv(igf1, "C:/Users/ca16591/Dropbox/Bristol/igf1.csv")

Loop for effect of IGF1 on metabolites

### Looping through the regression models of the effects of IGF1 on metabolites using robust standard errors and clustering on 'centre'

vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)
attach(vitD_met)
as.factor(casecontrol)
as.factor(centre)

logtransform<-function(vitD_mets,metnames)  
{   
  tx=vitD_mets[metnames]    
  tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })  
  tx=log(tx)    
  vitD_mets[,metnames]=tx   
  vitD_mets
}   

confint.robust <- function(object, parm, level = 0.95, ...)
{
  cf <- coef(object)
  pnames <- names(cf)
  if (missing(parm))
    parm <- pnames
  else if (is.numeric(parm))
    parm <- pnames[parm]
  a <- (1 - level)/2
  a <- c(a, 1 - a)
  pct <- stats:::format.perc(a, 3)
  fac <- qnorm(a)
  ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
                                                             pct))
  ses <- sqrt(diag(sandwich::vcovHC(object)))[parm]
  ci[] <- cf[parm] + ses %o% fac
  ci
}

linearRegress<-function(metabolite.name,exposure.name,dataset,covariate.name='T',metabolite.log='T',robustSE='T', subset=NULL)  
{   
  ##subseting   
  tx=dataset    
  if(!is.null(subset)) {ind=with(tx,eval(parse(text=subset))); tx=tx[ind,]} 
  
  ##log-transform and scaling the metabolites   
  logname=metabolite.name
  scalename=metabolite.name
  if (metabolite.log=='T') {tx=logtransform(tx,logname)}
  tx[,scalename]=scale(tx[,scalename])
  
  ##linear regression of exposure with metabolites      
  add=numeric() 
  fom=formula(paste('met~',paste(c('igf1', covariate.name=c('casecontrol', cluster = "centre", adjust =T)),collapse='+')))  
  for (j in 1:length(metabolite.name))  
  { 
    met=tx[,metabolite.name[j]];    
    fit=lm(fom,data=tx) 
    if(robustSE=='T'){
      temp=c(nobs(fit),coeftest(fit, vcov =vcovHC(fit,"HC1"))[exposure.name,], confint.robust(fit,exposure.name,level=0.95))
      names(temp)=c('N',names(coeftest(fit, vcov = vcovHC(fit, "HC1"))[exposure.name,]),colnames(confint.robust(fit,exposure.name,level=0.95)))
      
      
    }else{
      temp=c(summary(fit)$coef['igf1',],confint(fit)['igf1',]); 
    }
    
    add=rbind(add,temp);    
  }     
  rownames(add)=metabolite.name 
  add=data.frame(add,check.names = F)   
  add   
}   
result_linear=linearRegress(metnames,'igf1', vitD_met, covariate.name=c('casecontrol', cluster = "centre", adjust =T), metabolite.log='T',robustSE='T') 

result_linear$Metabolite=rownames(result_linear)


myvars=c("Metabolite", "Estimate","Std. Error",  "2.5 %",  "97.5 %", "Pr(>|t|)", "N" )
result_linear=result_linear[myvars]

result_linear <- result_linear[order(result_linear$'Pr(>|t|)'),]

write.csv(result_linear, "C:/Users/ca16591/Dropbox/Bristol/IGF1_on_mets.csv")

Loop for effect of metabolites on IGF1

### Looping through the regression models of the effects metabolites on IGF1 using robust standard errors and clustering on 'centre'

vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)
attach(vitD_met)
as.factor(casecontrol)
as.factor(centre)

logtransform<-function(vitD_mets,metnames)  
{   
  tx=vitD_mets[metnames]    
  tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })  
  tx=log(tx)    
  vitD_mets[,metnames]=tx   
  vitD_mets
}   


confint.robust <- function(object, parm, level = 0.95, ...)
{
  cf <- coef(object)
  pnames <- names(cf)
  if (missing(parm))
    parm <- pnames
  else if (is.numeric(parm))
    parm <- pnames[parm]
  a <- (1 - level)/2
  a <- c(a, 1 - a)
  pct <- stats:::format.perc(a, 3)
  fac <- qnorm(a)
  ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
                                                             pct))
  ses <- sqrt(diag(sandwich::vcovHC(object)))[parm]
  ci[] <- cf[parm] + ses %o% fac
  ci
}

linearRegress<-function(outcome.name,exposure.name,dataset,covariate.name='T',exposure.log='T',robustSE='T', subset=NULL)   
{   
  ##subseting   
  tx=dataset    
  if(!is.null(subset)) {ind=with(tx,eval(parse(text=subset))); tx=tx[ind,]} 
  
  ##log-transform and scaling the metabolites   
  logname=exposure.name
  scalename=exposure.name
  if (exposure.log=='T') {tx=logtransform(tx,logname)}
  tx[,scalename]=scale(tx[,scalename])
  
  ##linear regression of exposure with metabolites      
  add=numeric() 
  fom=formula(paste('igf1~',paste(c('met', covariate.name=c('casecontrol', cluster = "centre", adjust =T)),collapse='+')))  
  for (j in 1:length(exposure.name))    
  { 
    met=tx[,exposure.name[j]];      
    fit=lm(fom,data=tx) 
    if(robustSE=='T'){
      temp=c(nobs(fit),coeftest(fit, vcov =vcovHC(fit,"HC1"))['met',], confint.robust(fit,'met',level=0.95))
      names(temp)=c('N',names(coeftest(fit, vcov = vcovHC(fit, "HC1"))['met',]),colnames(confint.robust(fit,'met',level=0.95)))
      
      
    }else{
      temp=c(summary(fit)$coef['igf1',],confint(fit)['igf1',]); 
    }
    
    add=rbind(add,temp);    
  }     
  rownames(add)=exposure.name   
  add=data.frame(add,check.names = F)   
  add   
}   
result_linear2=linearRegress('igf1', metnames,vitD_met, covariate.name=c('casecontrol'), exposure.log='T',robustSE='T') 
result_linear2$Metabolite=rownames(result_linear)
myvars=c("Metabolite", "Estimate","Std. Error",  "2.5 %",  "97.5 %", "Pr(>|t|)", "N" )
result_linear2=result_linear2[myvars]

result_linear2 <- result_linear2[order(result_linear2$'Pr(>|t|)'),]

write.csv(result_linear2, "C:/Users/ca16591/Dropbox/Bristol/mets_on_IGF1.csv")

Top results for the effect of IGF1 on metabolites

Metabolite Estimate 2.5% 97.5% Pval
S.VLDL.PL.. -0.0020273 -0.0027651 -0.0012895 0.0000001
LDL.D -0.0019311 -0.0027011 -0.0011610 0.0000009
M.HDL.PL -0.0017695 -0.0025360 -0.0010030 0.0000060
Cit -0.0017598 -0.0025806 -0.0009390 0.0000245
M.LDL.TG.. -0.0017346 -0.0025481 -0.0009211 0.0000286
L.HDL.CE -0.0015789 -0.0023249 -0.0008330 0.0000330
ApoB.ApoA1 0.0015864 0.0008333 0.0023395 0.0000358
L.HDL.P -0.0015353 -0.0022761 -0.0007945 0.0000480
XL.HDL.CE.. 0.0016363 0.0008378 0.0024349 0.0000579
L.HDL.L -0.0015191 -0.0022604 -0.0007778 0.0000582
L.HDL.C -0.0015021 -0.0022475 -0.0007567 0.0000769
S.VLDL.FC.. -0.0015133 -0.0022676 -0.0007589 0.0000829
Val 0.0015251 0.0007637 0.0022866 0.0000836
M.HDL.P -0.0015427 -0.0023142 -0.0007712 0.0000874
HDL.D -0.0015029 -0.0022556 -0.0007503 0.0000895
M.HDL.L -0.0015306 -0.0023001 -0.0007611 0.0000953
L.HDL.PL -0.0014590 -0.0021994 -0.0007185 0.0001111
L.LDL.CE.. 0.0016397 0.0008033 0.0024761 0.0001188
FAw3.FA 0.0016614 0.0008057 0.0025172 0.0001327
M.HDL.FC -0.0014223 -0.0021720 -0.0006726 0.0001974
HDL.C -0.0014233 -0.0021887 -0.0006580 0.0002621
HDL2.C -0.0014181 -0.0021819 -0.0006544 0.0002683
L.LDL.TG.. -0.0014778 -0.0022873 -0.0006684 0.0003380
FAw3 0.0015852 0.0007104 0.0024599 0.0003480
S.VLDL.CE 0.0014187 0.0006350 0.0022025 0.0003771
S.VLDL.C 0.0013334 0.0005715 0.0020954 0.0005878
XL.HDL.C.. 0.0014299 0.0006122 0.0022476 0.0005911
L.HDL.PL.. 0.0013495 0.0005629 0.0021362 0.0007472
L.HDL.CE.. -0.0013965 -0.0022119 -0.0005812 0.0007625
M.HDL.C -0.0013095 -0.0020732 -0.0005458 0.0007648
XL.HDL.PL -0.0013600 -0.0021742 -0.0005458 0.0010480
M.HDL.CE -0.0012656 -0.0020302 -0.0005010 0.0011586
S.VLDL.L 0.0012218 0.0004802 0.0019635 0.0012197
L.LDL.C.. 0.0013745 0.0005391 0.0022099 0.0012397
S.HDL.PL -0.0013072 -0.0021039 -0.0005104 0.0012845
S.VLDL.P 0.0012136 0.0004726 0.0019545 0.0013033

Top results for the effect of metabolites on IGF1

Metabolite Estimate 2.5% 97.5% Pval
L.LDL.L -5.790360 -7.870503 -3.710217 0.0000001
PC -5.790417 -8.066264 -3.514570 0.0000006
XXL.VLDL.P -5.300545 -7.605295 -2.995795 0.0000066
S.HDL.CE -5.169202 -7.486564 -2.851841 0.0000122
LDL.C -5.088763 -7.425805 -2.751722 0.0000199
Glol 4.488738 2.400982 6.576494 0.0000214
L.HDL.TG.. 4.532914 2.416942 6.648887 0.0000270
XXL.VLDL.C.. 4.851103 2.521705 7.180501 0.0000440
XS.VLDL.PL.. 4.857421 2.519526 7.195316 0.0000457
M.VLDL.CE.. 4.515785 2.330918 6.700651 0.0000511
S.LDL.L -4.613319 -6.861982 -2.364656 0.0000581
UnSat -4.481701 -6.724264 -2.239139 0.0000898
S.HDL.P -4.462341 -6.706710 -2.217973 0.0000974
S.LDL.CE.. -4.437539 -6.680777 -2.194300 0.0001058
HDL.TG 4.680219 2.295464 7.064973 0.0001192
XL.HDL.FC.. -4.417954 -6.675381 -2.160528 0.0001247
S.HDL.CE.. -4.400558 -6.651665 -2.149451 0.0001273
XL.VLDL.PL -4.327313 -6.580877 -2.073749 0.0001656
Ile -4.466532 -6.820049 -2.113014 0.0001991
M.LDL.CE -4.247624 -6.488351 -2.006897 0.0002024
VLDL.TG -4.439272 -6.788243 -2.090301 0.0002117
S.VLDL.L 4.048434 1.904710 6.192158 0.0002125
XL.HDL.C -4.141385 -6.430267 -1.852504 0.0003867
IDL.PL 4.248820 1.882413 6.615227 0.0004244
M.HDL.CE 3.800365 1.685734 5.914996 0.0004255
bOHBut 3.816502 1.674215 5.958789 0.0004448
MUFA.FA -4.120176 -6.426546 -1.813807 0.0004606
IDL.P -4.095624 -6.414899 -1.776348 0.0005274
XL.VLDL.L -4.074667 -6.417634 -1.731699 0.0006365
VLDL.D 4.008505 1.699263 6.317748 0.0006596
SFA 4.062989 1.666835 6.459142 0.0008796
M.HDL.C 3.488133 1.399149 5.577118 0.0010598
L.HDL.CE.. 3.467553 1.375659 5.559447 0.0011526
L.VLDL.P -3.875232 -6.234874 -1.515590 0.0012552
Crea -3.980178 -6.411055 -1.549301 0.0012573
M.HDL.TG.. -3.874632 -6.233307 -1.515956 0.0012740