This is code for looping through the effects of IGF1 on metabolites and the effects of metabolites on IGF1. Metabolites have been log-transformed and scaled and the linear models adjusted using robust standard errors with clustering by ‘centre’. I included ‘casecontrol’ in the models as a covariate, but only to hold the space in the code so that it can be easily modified. We could run stratified analyses or adjust for any relevant covariates. I did not transform ‘igf1’.
install.packages('lattice', repos='http://cran.r-project.org')
install.packages('readr',repos='http://cran.r-project.org')
install.packages('readxl',repos='http://cran.r-project.org')
install.packages('sandwich',repos='http://cran.r-project.org')
install.packages('lmtest', repos='http://cran.r-project.org')library('lattice')
library('readr')
library('readxl')
library('sandwich')
library('lmtest')# Read-in the data
vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)
# Log-transform and scale the metabolites
logtransform<-function(vitD_mets,metnames)
{
tx=vitD_mets[metnames]
tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })
tx=log(tx)
vitD_mets[,metnames]=tx
vitD_mets
}
mets_logged=logtransform(vitD_met,metnames)
mets_logged.scaled=mets_logged
mets_logged.scaled[,metnames]=scale(mets_logged.scaled[,metnames])
# Select just the metabolites and IGF1 variable
new=data.frame(mets_logged.scaled[-c(231:331)], mets_logged.scaled$igf1,mets_logged.scaled$igf2)
new=new[-c(237:263)]
new=new[-c(232:236)]
new=new[-c(1:2)]
colnames(new)
# Remove missing data
newdata <- na.omit(new)
# Correlate
cor=cor(newdata)
# Save as a data.frame
igf1=as.data.frame(cor)
# Makes the metabolite rownames a column in the dataframe
igf1$metabolites=rownames(igf1)
# Select just the "igf1" and "metabolites" columns
myvars <- c("igf1", "metabolites")
igf1 <- igf1[myvars]
# Order the correlations
igf1 <- igf1[order(igf1$igf1),]
# Remove scientific notation (for ease of viewing)
options(scipen = 999)
# Save file
write.csv(igf1, "C:/Users/ca16591/Dropbox/Bristol/igf1.csv")### Looping through the regression models of the effects of IGF1 on metabolites using robust standard errors and clustering on 'centre'
vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)
attach(vitD_met)
as.factor(casecontrol)
as.factor(centre)
logtransform<-function(vitD_mets,metnames)
{
tx=vitD_mets[metnames]
tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })
tx=log(tx)
vitD_mets[,metnames]=tx
vitD_mets
}
confint.robust <- function(object, parm, level = 0.95, ...)
{
cf <- coef(object)
pnames <- names(cf)
if (missing(parm))
parm <- pnames
else if (is.numeric(parm))
parm <- pnames[parm]
a <- (1 - level)/2
a <- c(a, 1 - a)
pct <- stats:::format.perc(a, 3)
fac <- qnorm(a)
ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
pct))
ses <- sqrt(diag(sandwich::vcovHC(object)))[parm]
ci[] <- cf[parm] + ses %o% fac
ci
}
linearRegress<-function(metabolite.name,exposure.name,dataset,covariate.name='T',metabolite.log='T',robustSE='T', subset=NULL)
{
##subseting
tx=dataset
if(!is.null(subset)) {ind=with(tx,eval(parse(text=subset))); tx=tx[ind,]}
##log-transform and scaling the metabolites
logname=metabolite.name
scalename=metabolite.name
if (metabolite.log=='T') {tx=logtransform(tx,logname)}
tx[,scalename]=scale(tx[,scalename])
##linear regression of exposure with metabolites
add=numeric()
fom=formula(paste('met~',paste(c('igf1', covariate.name=c('casecontrol', cluster = "centre", adjust =T)),collapse='+')))
for (j in 1:length(metabolite.name))
{
met=tx[,metabolite.name[j]];
fit=lm(fom,data=tx)
if(robustSE=='T'){
temp=c(nobs(fit),coeftest(fit, vcov =vcovHC(fit,"HC1"))[exposure.name,], confint.robust(fit,exposure.name,level=0.95))
names(temp)=c('N',names(coeftest(fit, vcov = vcovHC(fit, "HC1"))[exposure.name,]),colnames(confint.robust(fit,exposure.name,level=0.95)))
}else{
temp=c(summary(fit)$coef['igf1',],confint(fit)['igf1',]);
}
add=rbind(add,temp);
}
rownames(add)=metabolite.name
add=data.frame(add,check.names = F)
add
}
result_linear=linearRegress(metnames,'igf1', vitD_met, covariate.name=c('casecontrol', cluster = "centre", adjust =T), metabolite.log='T',robustSE='T')
result_linear$Metabolite=rownames(result_linear)
myvars=c("Metabolite", "Estimate","Std. Error", "2.5 %", "97.5 %", "Pr(>|t|)", "N" )
result_linear=result_linear[myvars]
result_linear <- result_linear[order(result_linear$'Pr(>|t|)'),]
write.csv(result_linear, "C:/Users/ca16591/Dropbox/Bristol/IGF1_on_mets.csv")### Looping through the regression models of the effects metabolites on IGF1 using robust standard errors and clustering on 'centre'
vitD_met <- read.csv("C:/Users/ca16591/Dropbox/Bristol/vitD_mets2.csv")
metnames=colnames(vitD_met)[4:230]
colnames(vitD_met)
attach(vitD_met)
as.factor(casecontrol)
as.factor(centre)
logtransform<-function(vitD_mets,metnames)
{
tx=vitD_mets[metnames]
tx=apply(tx,2,function(x){if (min(x,na.rm=T)==0) x=x+1 else x })
tx=log(tx)
vitD_mets[,metnames]=tx
vitD_mets
}
confint.robust <- function(object, parm, level = 0.95, ...)
{
cf <- coef(object)
pnames <- names(cf)
if (missing(parm))
parm <- pnames
else if (is.numeric(parm))
parm <- pnames[parm]
a <- (1 - level)/2
a <- c(a, 1 - a)
pct <- stats:::format.perc(a, 3)
fac <- qnorm(a)
ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm,
pct))
ses <- sqrt(diag(sandwich::vcovHC(object)))[parm]
ci[] <- cf[parm] + ses %o% fac
ci
}
linearRegress<-function(outcome.name,exposure.name,dataset,covariate.name='T',exposure.log='T',robustSE='T', subset=NULL)
{
##subseting
tx=dataset
if(!is.null(subset)) {ind=with(tx,eval(parse(text=subset))); tx=tx[ind,]}
##log-transform and scaling the metabolites
logname=exposure.name
scalename=exposure.name
if (exposure.log=='T') {tx=logtransform(tx,logname)}
tx[,scalename]=scale(tx[,scalename])
##linear regression of exposure with metabolites
add=numeric()
fom=formula(paste('igf1~',paste(c('met', covariate.name=c('casecontrol', cluster = "centre", adjust =T)),collapse='+')))
for (j in 1:length(exposure.name))
{
met=tx[,exposure.name[j]];
fit=lm(fom,data=tx)
if(robustSE=='T'){
temp=c(nobs(fit),coeftest(fit, vcov =vcovHC(fit,"HC1"))['met',], confint.robust(fit,'met',level=0.95))
names(temp)=c('N',names(coeftest(fit, vcov = vcovHC(fit, "HC1"))['met',]),colnames(confint.robust(fit,'met',level=0.95)))
}else{
temp=c(summary(fit)$coef['igf1',],confint(fit)['igf1',]);
}
add=rbind(add,temp);
}
rownames(add)=exposure.name
add=data.frame(add,check.names = F)
add
}
result_linear2=linearRegress('igf1', metnames,vitD_met, covariate.name=c('casecontrol'), exposure.log='T',robustSE='T')
result_linear2$Metabolite=rownames(result_linear)
myvars=c("Metabolite", "Estimate","Std. Error", "2.5 %", "97.5 %", "Pr(>|t|)", "N" )
result_linear2=result_linear2[myvars]
result_linear2 <- result_linear2[order(result_linear2$'Pr(>|t|)'),]
write.csv(result_linear2, "C:/Users/ca16591/Dropbox/Bristol/mets_on_IGF1.csv")| Metabolite | Estimate | 2.5% | 97.5% | Pval |
|---|---|---|---|---|
| S.VLDL.PL.. | -0.0020273 | -0.0027651 | -0.0012895 | 0.0000001 |
| LDL.D | -0.0019311 | -0.0027011 | -0.0011610 | 0.0000009 |
| M.HDL.PL | -0.0017695 | -0.0025360 | -0.0010030 | 0.0000060 |
| Cit | -0.0017598 | -0.0025806 | -0.0009390 | 0.0000245 |
| M.LDL.TG.. | -0.0017346 | -0.0025481 | -0.0009211 | 0.0000286 |
| L.HDL.CE | -0.0015789 | -0.0023249 | -0.0008330 | 0.0000330 |
| ApoB.ApoA1 | 0.0015864 | 0.0008333 | 0.0023395 | 0.0000358 |
| L.HDL.P | -0.0015353 | -0.0022761 | -0.0007945 | 0.0000480 |
| XL.HDL.CE.. | 0.0016363 | 0.0008378 | 0.0024349 | 0.0000579 |
| L.HDL.L | -0.0015191 | -0.0022604 | -0.0007778 | 0.0000582 |
| L.HDL.C | -0.0015021 | -0.0022475 | -0.0007567 | 0.0000769 |
| S.VLDL.FC.. | -0.0015133 | -0.0022676 | -0.0007589 | 0.0000829 |
| Val | 0.0015251 | 0.0007637 | 0.0022866 | 0.0000836 |
| M.HDL.P | -0.0015427 | -0.0023142 | -0.0007712 | 0.0000874 |
| HDL.D | -0.0015029 | -0.0022556 | -0.0007503 | 0.0000895 |
| M.HDL.L | -0.0015306 | -0.0023001 | -0.0007611 | 0.0000953 |
| L.HDL.PL | -0.0014590 | -0.0021994 | -0.0007185 | 0.0001111 |
| L.LDL.CE.. | 0.0016397 | 0.0008033 | 0.0024761 | 0.0001188 |
| FAw3.FA | 0.0016614 | 0.0008057 | 0.0025172 | 0.0001327 |
| M.HDL.FC | -0.0014223 | -0.0021720 | -0.0006726 | 0.0001974 |
| HDL.C | -0.0014233 | -0.0021887 | -0.0006580 | 0.0002621 |
| HDL2.C | -0.0014181 | -0.0021819 | -0.0006544 | 0.0002683 |
| L.LDL.TG.. | -0.0014778 | -0.0022873 | -0.0006684 | 0.0003380 |
| FAw3 | 0.0015852 | 0.0007104 | 0.0024599 | 0.0003480 |
| S.VLDL.CE | 0.0014187 | 0.0006350 | 0.0022025 | 0.0003771 |
| S.VLDL.C | 0.0013334 | 0.0005715 | 0.0020954 | 0.0005878 |
| XL.HDL.C.. | 0.0014299 | 0.0006122 | 0.0022476 | 0.0005911 |
| L.HDL.PL.. | 0.0013495 | 0.0005629 | 0.0021362 | 0.0007472 |
| L.HDL.CE.. | -0.0013965 | -0.0022119 | -0.0005812 | 0.0007625 |
| M.HDL.C | -0.0013095 | -0.0020732 | -0.0005458 | 0.0007648 |
| XL.HDL.PL | -0.0013600 | -0.0021742 | -0.0005458 | 0.0010480 |
| M.HDL.CE | -0.0012656 | -0.0020302 | -0.0005010 | 0.0011586 |
| S.VLDL.L | 0.0012218 | 0.0004802 | 0.0019635 | 0.0012197 |
| L.LDL.C.. | 0.0013745 | 0.0005391 | 0.0022099 | 0.0012397 |
| S.HDL.PL | -0.0013072 | -0.0021039 | -0.0005104 | 0.0012845 |
| S.VLDL.P | 0.0012136 | 0.0004726 | 0.0019545 | 0.0013033 |
| Metabolite | Estimate | 2.5% | 97.5% | Pval |
|---|---|---|---|---|
| L.LDL.L | -5.790360 | -7.870503 | -3.710217 | 0.0000001 |
| PC | -5.790417 | -8.066264 | -3.514570 | 0.0000006 |
| XXL.VLDL.P | -5.300545 | -7.605295 | -2.995795 | 0.0000066 |
| S.HDL.CE | -5.169202 | -7.486564 | -2.851841 | 0.0000122 |
| LDL.C | -5.088763 | -7.425805 | -2.751722 | 0.0000199 |
| Glol | 4.488738 | 2.400982 | 6.576494 | 0.0000214 |
| L.HDL.TG.. | 4.532914 | 2.416942 | 6.648887 | 0.0000270 |
| XXL.VLDL.C.. | 4.851103 | 2.521705 | 7.180501 | 0.0000440 |
| XS.VLDL.PL.. | 4.857421 | 2.519526 | 7.195316 | 0.0000457 |
| M.VLDL.CE.. | 4.515785 | 2.330918 | 6.700651 | 0.0000511 |
| S.LDL.L | -4.613319 | -6.861982 | -2.364656 | 0.0000581 |
| UnSat | -4.481701 | -6.724264 | -2.239139 | 0.0000898 |
| S.HDL.P | -4.462341 | -6.706710 | -2.217973 | 0.0000974 |
| S.LDL.CE.. | -4.437539 | -6.680777 | -2.194300 | 0.0001058 |
| HDL.TG | 4.680219 | 2.295464 | 7.064973 | 0.0001192 |
| XL.HDL.FC.. | -4.417954 | -6.675381 | -2.160528 | 0.0001247 |
| S.HDL.CE.. | -4.400558 | -6.651665 | -2.149451 | 0.0001273 |
| XL.VLDL.PL | -4.327313 | -6.580877 | -2.073749 | 0.0001656 |
| Ile | -4.466532 | -6.820049 | -2.113014 | 0.0001991 |
| M.LDL.CE | -4.247624 | -6.488351 | -2.006897 | 0.0002024 |
| VLDL.TG | -4.439272 | -6.788243 | -2.090301 | 0.0002117 |
| S.VLDL.L | 4.048434 | 1.904710 | 6.192158 | 0.0002125 |
| XL.HDL.C | -4.141385 | -6.430267 | -1.852504 | 0.0003867 |
| IDL.PL | 4.248820 | 1.882413 | 6.615227 | 0.0004244 |
| M.HDL.CE | 3.800365 | 1.685734 | 5.914996 | 0.0004255 |
| bOHBut | 3.816502 | 1.674215 | 5.958789 | 0.0004448 |
| MUFA.FA | -4.120176 | -6.426546 | -1.813807 | 0.0004606 |
| IDL.P | -4.095624 | -6.414899 | -1.776348 | 0.0005274 |
| XL.VLDL.L | -4.074667 | -6.417634 | -1.731699 | 0.0006365 |
| VLDL.D | 4.008505 | 1.699263 | 6.317748 | 0.0006596 |
| SFA | 4.062989 | 1.666835 | 6.459142 | 0.0008796 |
| M.HDL.C | 3.488133 | 1.399149 | 5.577118 | 0.0010598 |
| L.HDL.CE.. | 3.467553 | 1.375659 | 5.559447 | 0.0011526 |
| L.VLDL.P | -3.875232 | -6.234874 | -1.515590 | 0.0012552 |
| Crea | -3.980178 | -6.411055 | -1.549301 | 0.0012573 |
| M.HDL.TG.. | -3.874632 | -6.233307 | -1.515956 | 0.0012740 |