Lifespan…
# View hidden files for editing .bashrc
ls -la ~/ | more
rm -rf .git
# Set environment
mkdir -p ~/metab_longevity/lifespan/
mkdir -p ~/metab_longevity/lifespan/results
export lifespan_HOME=~/metab_longevity/lifespan/
export lifespan_HOME_results=~/metab_longevity/lifespan/results
mkdir -p ~/metab_longevity/healthspan/
mkdir -p ~/metab_longevity/healthspan/results
export healthspan_HOME=~/metab_longevity/healthspan/
export healthspan_HOME_results=~/metab_longevity/healthspan/results
mkdir -p ~/metab_longevity/extreme_longevity/
mkdir -p ~/metab_longevity/extreme_longevity/results
export extreme_longevity_HOME=~/metab_longevity/extreme_longevity/
export extreme_longevity_HOME_results=~/metab_longevity/extreme_longevity/results
# Open a terminal window and add the environmental variables into the .bashrc file with the nano editor
nano ~/.bashrc
# Save the .bashrc, exit nano, and source it
source ~/.bashrc
# Test that the envrionmental variables worked
echo $lifespan_HOME
echo $extreme_longevity_HOME_results
# Check to see if they worked
env | grep lifespan_HOME
env | grep healthspan_HOME
env | grep extreme_longevity
# Getting the extreme longevity GWASs from wget
wget --recursive --no-parent -nd http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST008001-GCST009000/GCST008598/Results_90th_percentile.txt.gz
wget --recursive --no-parent -nd http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST008001-GCST009000/GCST008599/Results_99th_percentile.txt.gz
gzip -dk Results_90th_percentile.txt.gz
gzip -dk Results_99th_percentile.txt.gz
# Clean up folders
mkdir /n/home04/cdadams/metab_longevity/extreme_longevity99
mkdir /n/home04/cdadams/metab_longevity/extreme_longevity99/results
cd /n/home04/cdadams/metab_longevity/extreme_longevity
mv Results_99th_percentile.txt.gz /n/home04/cdadams/metab_longevity/extreme_longevity99
mv Results_99th_percentile.txt /n/home04/cdadams/metab_longevity/extreme_longevity99
# Get html reports and figures under results
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results
mv *mr_report.md /n/home04/cdadams/metab_longevity/lifespan/results
mv index.html /n/home04/cdadams/metab_longevity/extreme_longevity
cd /n/home04/cdadams/metab_longevity/extreme_longevity
mv figure /n/home04/cdadams/metab_longevity/lifespan/results
mv README.md /n/home04/cdadams/metab_longevity/lifespan
cd /n/home04/cdadams/metab_longevity/lifespan/results
mkdir /n/home04/cdadams/metab_longevity/lifespan/results/reports
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results/reports
cd /n/home04/cdadams/metab_longevity/healthspan/
mv figure /n/home04/cdadams/metab_longevity/healthspan/results/reports
mkdir /n/home04/cdadams/metab_longevity/healthspan/results/reports
mv *.html /n/home04/cdadams/metab_longevity/healthspan/results/reports
mkdir /n/home04/cdadams/metab_longevity/healthspan/results/reports/negation
mv *.html /n/home04/cdadams/metab_longevity/healthspan/results/reports/negation
mv figure /n/home04/cdadams/metab_longevity/healthspan/results/reports/negation
cd /n/home04/cdadams/metab_longevity/extreme_longevity/
mv figure /n/home04/cdadams/metab_longevity/extreme_longevity/results/reports
mkdir /n/home04/cdadams/metab_longevity/extreme_longevity/results/reports
mv *.html /n/home04/cdadams/metab_longevity/extreme_longevity/results/reports
cd /n/home04/cdadams/metab_longevity/extreme_longevity99/
mv figure /n/home04/cdadams/metab_longevity/extreme_longevity99/results/reports
mkdir /n/home04/cdadams/metab_longevity/extreme_longevity99/results/reports
mv *.html /n/home04/cdadams/metab_longevity/extreme_longevity99/results/reports
less -s Results_90th_percentile.txt
# Consider moving this analysis to:
cd /n/holylfs/LABS/lemos_lab/Users/cdadams
# Set up private Git resp on cdadams-harvard
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
git init
echo "## MR of circulating metabolites on four measures of longevity
## Rupbs: https://rpubs.com/Charleen_D_Adams/767266
<hr style=border:2px solid green> </hr>">>README.md
git add README.md
git commit -m "README for metabolites on longevity"
git branch -M main
git remote add origin https://github.com/cdadams-harvard/metab_longevity.git
git branch -M main
git add -A
git commit -m "all initial subdirectories and results for (parental) lifespan"
git push --all
The files for the (parental) lifespan data are available at: https://datashare.ed.ac.uk/handle/10283/3209
They come with the following README:
This dataset contains HRC-imputed GWAS summary statistics on parental survival, under the assumption of common effect sizes
between fathers and mothers.
rsid - reference SNP cluster ID;
snpid - SNP chromosome and position ID;
chr - Chromosome;
pos - Base-pair position on chromosome (GRCh37);
a1 - the effect allele;
a0 - the reference allele;
n - Number of phenotypes analysed for the SNP. Note that combined parental residuals from UK Biobank are counted once,
and LifeGen meta-analysed father and mother statistics are counted once each, and as a result, the total number of
phenotypes analysed is actually greater than the number listed.
freq1 - frequency of the effect allele;
beta1 - log hazard protection ratio (negation of log hazard ratio) for carrying one copy of the a1 allele in self;
se - Standard Error;
p - the P value for the Wald test of association between imputed dosage and cox model residual;
direction - direction of effect in UK Biobank and LifeGen;
info - imputation info score;
freq_se - standard error of effect allele frequency between UK Biobank and LifeGen;
min_freq1 - minimum effect allele frequency across UK Biobank and LifeGen;
max_freq1 - maximum effect allele frequency across UK Biobank and LifeGen.
The healthspan GWAS comes from https://zenodo.org/record/1302861#.YJh3rWYpC3I
The dataset contains genome-wide association summary statistics computed for heathspan. The UKB sub-population of 300,447 genetically Caucasian, British individuals were analyzed.
Column headers:
SNPID - SNP rsID
chr - chromosome
pos - position (GRCh37 build / hg19)
EA - effective allele (coded as "1")
RA - reference allele (coded as "0")
EAF - effective allele frequency
beta - effect size of effective allele
se - standard error of effect size
Z - Z-value of association
-log10(p-value) - minus log10(P-value) of association
The extreme longevity GWASs comes from https://www.nature.com/articles/s41467-019-11558-2.
[W]e perform two meta-analyses of GWA studies of a rigorous longevity phenotype definition including 11,262/3484 cases surviving at or beyond the age corresponding to the 90th/99th survival percentile, respectively, and 25,483 controls whose age at death or at last contact was at or below the age corresponding to the 60th survival percentile.
SNP: SNP-ID based on the EasyQC output
Chr: Chromosome based on Genome Reference Consortium Human Build 37 (GRCh37)
Position: Position based on Genome Reference Consortium Human Build 37 (GRCh37)
EA: Effect allele
NEA: Non-effect allele
EAF: Effect allele frequency
Beta: LN(Odds Ratio) based on the effect allele
SE: Standard error belonging to the LN(Odds Ratio)
P-value: P-value
Effective_N: Total number of samples included in the analysis of the SNP (based on the effective N)
rsID: SNP-ID based on the IMPUTE v2 1000 Genomes Phase I reference panel legend file
setwd("/n/home04/cdadams/metab_longevity/extreme_longevity")
longevity_90=read.table('/n/home04/cdadams/metab_longevity/extreme_longevity/Results_90th_percentile.txt', header = TRUE, sep = "\t", dec = ".")
# Fetch the rsids
ch1_snps <- getSNPlocs("ch1", as.GRanges=TRUE)
ch1=longevity_90[which(longevity_90$Chr=="1"),]
mypos <- ch1$Position
idx <- match(mypos, start(ch1_snps))
rsids1 <- mcols(ch1_snps)$RefSNP_id[idx]
ch1$rsid=rsids1
ch1$rsid_rs=paste0("rs", ch1$rsid)
ch1.t = data.table(ch1, key="SNP")
ch2_snps <- getSNPlocs("ch2", as.GRanges=TRUE)
ch2=longevity_90[which(longevity_90$Chr=="2"),]
mypos <- ch2$Position
idx <- match(mypos, start(ch2_snps))
rsids2 <- mcols(ch2_snps)$RefSNP_id[idx]
ch2$rsid=rsids2
ch2$rsid_rs=paste0("rs", ch2$rsid)
ch2.t = data.table(ch2, key="SNP")
ch3_snps <- getSNPlocs("ch3", as.GRanges=TRUE)
ch3=longevity_90[which(longevity_90$Chr=="3"),]
mypos <- ch3$Position
idx <- match(mypos, start(ch3_snps))
rsids3 <- mcols(ch3_snps)$RefSNP_id[idx]
ch3$rsid=rsids3
ch3$rsid_rs=paste0("rs", ch3$rsid)
ch3.t = data.table(ch3, key="SNP")
ch4_snps <- getSNPlocs("ch4", as.GRanges=TRUE)
ch4=longevity_90[which(longevity_90$Chr=="4"),]
mypos <- ch4$Position
idx <- match(mypos, start(ch4_snps))
rsids4 <- mcols(ch4_snps)$RefSNP_id[idx]
ch4$rsid=rsids4
ch4$rsid_rs=paste0("rs", ch4$rsid)
ch4.t = data.table(ch4, key="SNP")
ch5_snps <- getSNPlocs("ch5", as.GRanges=TRUE)
ch5=longevity_90[which(longevity_90$Chr=="5"),]
mypos <- ch5$Position
idx <- match(mypos, start(ch5_snps))
rsids5 <- mcols(ch5_snps)$RefSNP_id[idx]
ch5$rsid=rsids5
ch5$rsid_rs=paste0("rs", ch5$rsid)
ch5.t = data.table(ch5, key="SNP")
ch6_snps <- getSNPlocs("ch6", as.GRanges=TRUE)
ch6=longevity_90[which(longevity_90$Chr=="6"),]
mypos <- ch6$Position
idx <- match(mypos, start(ch6_snps))
rsids6 <- mcols(ch6_snps)$RefSNP_id[idx]
ch6$rsid=rsids6
ch6$rsid_rs=paste0("rs", ch6$rsid)
ch6.t = data.table(ch6, key="SNP")
ch7_snps <- getSNPlocs("ch7", as.GRanges=TRUE)
ch7=longevity_90[which(longevity_90$Chr=="7"),]
mypos <- ch7$Position
idx <- match(mypos, start(ch7_snps))
rsids7 <- mcols(ch7_snps)$RefSNP_id[idx]
ch7$rsid=rsids7
ch7$rsid_rs=paste0("rs", ch7$rsid)
ch7.t = data.table(ch7, key="SNP")
ch8_snps <- getSNPlocs("ch8", as.GRanges=TRUE)
ch8=longevity_90[which(longevity_90$Chr=="8"),]
mypos <- ch8$Position
idx <- match(mypos, start(ch8_snps))
rsids8 <- mcols(ch8_snps)$RefSNP_id[idx]
ch8$rsid=rsids8
ch8$rsid_rs=paste0("rs", ch8$rsid)
ch8.t = data.table(ch8, key="SNP")
ch9_snps <- getSNPlocs("ch9", as.GRanges=TRUE)
ch9=longevity_90[which(longevity_90$Chr=="9"),]
mypos <- ch9$Position
idx <- match(mypos, start(ch9_snps))
rsids9 <- mcols(ch9_snps)$RefSNP_id[idx]
ch9$rsid=rsids9
ch9$rsid_rs=paste0("rs", ch9$rsid)
ch9.t = data.table(ch9, key="SNP")
ch10_snps <- getSNPlocs("ch10", as.GRanges=TRUE)
ch10=longevity_90[which(longevity_90$Chr=="10"),]
mypos <- ch10$Position
idx <- match(mypos, start(ch10_snps))
rsids10 <- mcols(ch10_snps)$RefSNP_id[idx]
ch10$rsid=rsids10
ch10$rsid_rs=paste0("rs", ch10$rsid)
ch10.t = data.table(ch10, key="SNP")
ch11_snps <- getSNPlocs("ch11", as.GRanges=TRUE)
ch11=longevity_90[which(longevity_90$Chr=="11"),]
mypos <- ch11$Position
idx <- match(mypos, start(ch11_snps))
rsids11 <- mcols(ch11_snps)$RefSNP_id[idx]
ch11$rsid=rsids11
ch11$rsid_rs=paste0("rs", ch11$rsid)
ch11.t = data.table(ch11, key="SNP")
ch12_snps <- getSNPlocs("ch12", as.GRanges=TRUE)
ch12=longevity_90[which(longevity_90$Chr=="12"),]
mypos <- ch12$Position
idx <- match(mypos, start(ch12_snps))
rsids12 <- mcols(ch12_snps)$RefSNP_id[idx]
ch12$rsid=rsids12
ch12$rsid_rs=paste0("rs", ch12$rsid)
ch12.t = data.table(ch12, key="SNP")
ch13_snps <- getSNPlocs("ch13", as.GRanges=TRUE)
ch13=longevity_90[which(longevity_90$Chr=="13"),]
mypos <- ch13$Position
idx <- match(mypos, start(ch13_snps))
rsids13 <- mcols(ch13_snps)$RefSNP_id[idx]
ch13$rsid=rsids13
ch13$rsid_rs=paste0("rs", ch13$rsid)
ch13.t = data.table(ch13, key="SNP")
ch14_snps <- getSNPlocs("ch14", as.GRanges=TRUE)
ch14=longevity_90[which(longevity_90$Chr=="14"),]
mypos <- ch14$Position
idx <- match(mypos, start(ch14_snps))
rsids14 <- mcols(ch14_snps)$RefSNP_id[idx]
ch14$rsid=rsids14
ch14$rsid_rs=paste0("rs", ch14$rsid)
ch14.t = data.table(ch14, key="SNP")
ch15_snps <- getSNPlocs("ch15", as.GRanges=TRUE)
ch15=longevity_90[which(longevity_90$Chr=="15"),]
mypos <- ch15$Position
idx <- match(mypos, start(ch15_snps))
rsids15 <- mcols(ch15_snps)$RefSNP_id[idx]
ch15$rsid=rsids15
ch15$rsid_rs=paste0("rs", ch15$rsid)
ch15.t = data.table(ch15, key="SNP")
ch16_snps <- getSNPlocs("ch16", as.GRanges=TRUE)
ch16=longevity_90[which(longevity_90$Chr=="16"),]
mypos <- ch16$Position
idx <- match(mypos, start(ch16_snps))
rsids16 <- mcols(ch16_snps)$RefSNP_id[idx]
ch16$rsid=rsids16
ch16$rsid_rs=paste0("rs", ch16$rsid)
ch16.t = data.table(ch16, key="SNP")
ch17_snps <- getSNPlocs("ch17", as.GRanges=TRUE)
ch17=longevity_90[which(longevity_90$Chr=="17"),]
mypos <- ch17$Position
idx <- match(mypos, start(ch17_snps))
rsids17 <- mcols(ch17_snps)$RefSNP_id[idx]
ch17$rsid=rsids17
ch17$rsid_rs=paste0("rs", ch17$rsid)
ch17.t = data.table(ch17, key="SNP")
ch18_snps <- getSNPlocs("ch18", as.GRanges=TRUE)
ch18=longevity_90[which(longevity_90$Chr=="18"),]
mypos <- ch18$Position
idx <- match(mypos, start(ch18_snps))
rsids18 <- mcols(ch18_snps)$RefSNP_id[idx]
ch18$rsid=rsids18
ch18$rsid_rs=paste0("rs", ch18$rsid)
ch18.t = data.table(ch18, key="SNP")
ch19_snps <- getSNPlocs("ch19", as.GRanges=TRUE)
ch19=longevity_90[which(longevity_90$Chr=="19"),]
mypos <- ch19$Position
idx <- match(mypos, start(ch19_snps))
rsids19 <- mcols(ch19_snps)$RefSNP_id[idx]
ch19$rsid=rsids19
ch19$rsid_rs=paste0("rs", ch19$rsid)
ch19.t = data.table(ch19, key="SNP")
ch20_snps <- getSNPlocs("ch20", as.GRanges=TRUE)
ch20=longevity_90[which(longevity_90$Chr=="20"),]
mypos <- ch20$Position
idx <- match(mypos, start(ch20_snps))
rsids20 <- mcols(ch20_snps)$RefSNP_id[idx]
ch20$rsid=rsids20
ch20$rsid_rs=paste0("rs", ch20$rsid)
ch20.t = data.table(ch20, key="SNP")
ch21_snps <- getSNPlocs("ch21", as.GRanges=TRUE)
ch21=longevity_90[which(longevity_90$Chr=="21"),]
mypos <- ch21$Position
idx <- match(mypos, start(ch21_snps))
rsids21 <- mcols(ch21_snps)$RefSNP_id[idx]
ch21$rsid=rsids21
ch21$rsid_rs=paste0("rs", ch21$rsid)
ch21.t = data.table(ch21, key="SNP")
ch22_snps <- getSNPlocs("ch22", as.GRanges=TRUE)
ch22=longevity_90[which(longevity_90$Chr=="22"),]
mypos <- ch22$Position
idx <- match(mypos, start(ch22_snps))
rsids22 <- mcols(ch22_snps)$RefSNP_id[idx]
ch22$rsid=rsids22
ch22$rsid_rs=paste0("rs", ch22$rsid)
ch22.t = data.table(ch22, key="SNP")
# by=c("SNP","Chr","Position","EA","NEA",
# "EAF","Beta","SE","P.value","Effective_N",
# "rsid","rsid_rs")
rm(list=ls()[! ls() %in% c("ch1.t","ch2.t","ch3.t","ch4.t","ch5.t","ch6.t","ch7.t",
"ch8.t","ch9.t","ch10.t",
"ch11.t","ch12.t","ch13.t",
"ch14.t","ch15.t","ch16.t","ch17.t","ch18.t",
"ch19.t","ch20.t","ch21.t","ch22.t")])
# Merge together and remove NAs
merged <- rbind(ch1.t,ch2.t,ch3.t,ch4.t,ch5.t,ch6.t,ch7.t,ch8.t,
ch9.t,ch10.t,ch11.t,ch12.t,ch13.t,ch14.t,
ch15.t,ch16.t,ch17.t,ch18.t,ch19.t,ch20.t,ch21.t,ch22.t)
rm(list=ls()[! ls() %in% c("merged")])
dim(merged)
write.csv(merged, 'longevity_90_rsid.csv')
long_clean <- na.omit(merged)
dim(long_clean)
write.csv(long_clean, 'NA_gone_longevity_90_rsid.csv')
setwd("/n/home04/cdadams/metab_longevity/extreme_longevity99")
longevity_99=read.table('/n/home04/cdadams/metab_longevity/extreme_longevity99/Results_99th_percentile.txt', header = TRUE, sep = "\t", dec = ".")
# Fetch the rsids
ch1_snps <- getSNPlocs("ch1", as.GRanges=TRUE)
ch1=longevity_99[which(longevity_99$Chr=="1"),]
mypos <- ch1$Position
idx <- match(mypos, start(ch1_snps))
rsids1 <- mcols(ch1_snps)$RefSNP_id[idx]
ch1$rsid=rsids1
ch1$rsid_rs=paste0("rs", ch1$rsid)
ch1.t = data.table(ch1, key="SNP")
ch2_snps <- getSNPlocs("ch2", as.GRanges=TRUE)
ch2=longevity_99[which(longevity_99$Chr=="2"),]
mypos <- ch2$Position
idx <- match(mypos, start(ch2_snps))
rsids2 <- mcols(ch2_snps)$RefSNP_id[idx]
ch2$rsid=rsids2
ch2$rsid_rs=paste0("rs", ch2$rsid)
ch2.t = data.table(ch2, key="SNP")
ch3_snps <- getSNPlocs("ch3", as.GRanges=TRUE)
ch3=longevity_99[which(longevity_99$Chr=="3"),]
mypos <- ch3$Position
idx <- match(mypos, start(ch3_snps))
rsids3 <- mcols(ch3_snps)$RefSNP_id[idx]
ch3$rsid=rsids3
ch3$rsid_rs=paste0("rs", ch3$rsid)
ch3.t = data.table(ch3, key="SNP")
ch4_snps <- getSNPlocs("ch4", as.GRanges=TRUE)
ch4=longevity_99[which(longevity_99$Chr=="4"),]
mypos <- ch4$Position
idx <- match(mypos, start(ch4_snps))
rsids4 <- mcols(ch4_snps)$RefSNP_id[idx]
ch4$rsid=rsids4
ch4$rsid_rs=paste0("rs", ch4$rsid)
ch4.t = data.table(ch4, key="SNP")
ch5_snps <- getSNPlocs("ch5", as.GRanges=TRUE)
ch5=longevity_99[which(longevity_99$Chr=="5"),]
mypos <- ch5$Position
idx <- match(mypos, start(ch5_snps))
rsids5 <- mcols(ch5_snps)$RefSNP_id[idx]
ch5$rsid=rsids5
ch5$rsid_rs=paste0("rs", ch5$rsid)
ch5.t = data.table(ch5, key="SNP")
ch6_snps <- getSNPlocs("ch6", as.GRanges=TRUE)
ch6=longevity_99[which(longevity_99$Chr=="6"),]
mypos <- ch6$Position
idx <- match(mypos, start(ch6_snps))
rsids6 <- mcols(ch6_snps)$RefSNP_id[idx]
ch6$rsid=rsids6
ch6$rsid_rs=paste0("rs", ch6$rsid)
ch6.t = data.table(ch6, key="SNP")
ch7_snps <- getSNPlocs("ch7", as.GRanges=TRUE)
ch7=longevity_99[which(longevity_99$Chr=="7"),]
mypos <- ch7$Position
idx <- match(mypos, start(ch7_snps))
rsids7 <- mcols(ch7_snps)$RefSNP_id[idx]
ch7$rsid=rsids7
ch7$rsid_rs=paste0("rs", ch7$rsid)
ch7.t = data.table(ch7, key="SNP")
ch8_snps <- getSNPlocs("ch8", as.GRanges=TRUE)
ch8=longevity_99[which(longevity_99$Chr=="8"),]
mypos <- ch8$Position
idx <- match(mypos, start(ch8_snps))
rsids8 <- mcols(ch8_snps)$RefSNP_id[idx]
ch8$rsid=rsids8
ch8$rsid_rs=paste0("rs", ch8$rsid)
ch8.t = data.table(ch8, key="SNP")
ch9_snps <- getSNPlocs("ch9", as.GRanges=TRUE)
ch9=longevity_99[which(longevity_99$Chr=="9"),]
mypos <- ch9$Position
idx <- match(mypos, start(ch9_snps))
rsids9 <- mcols(ch9_snps)$RefSNP_id[idx]
ch9$rsid=rsids9
ch9$rsid_rs=paste0("rs", ch9$rsid)
ch9.t = data.table(ch9, key="SNP")
ch10_snps <- getSNPlocs("ch10", as.GRanges=TRUE)
ch10=longevity_99[which(longevity_99$Chr=="10"),]
mypos <- ch10$Position
idx <- match(mypos, start(ch10_snps))
rsids10 <- mcols(ch10_snps)$RefSNP_id[idx]
ch10$rsid=rsids10
ch10$rsid_rs=paste0("rs", ch10$rsid)
ch10.t = data.table(ch10, key="SNP")
ch11_snps <- getSNPlocs("ch11", as.GRanges=TRUE)
ch11=longevity_99[which(longevity_99$Chr=="11"),]
mypos <- ch11$Position
idx <- match(mypos, start(ch11_snps))
rsids11 <- mcols(ch11_snps)$RefSNP_id[idx]
ch11$rsid=rsids11
ch11$rsid_rs=paste0("rs", ch11$rsid)
ch11.t = data.table(ch11, key="SNP")
ch12_snps <- getSNPlocs("ch12", as.GRanges=TRUE)
ch12=longevity_99[which(longevity_99$Chr=="12"),]
mypos <- ch12$Position
idx <- match(mypos, start(ch12_snps))
rsids12 <- mcols(ch12_snps)$RefSNP_id[idx]
ch12$rsid=rsids12
ch12$rsid_rs=paste0("rs", ch12$rsid)
ch12.t = data.table(ch12, key="SNP")
ch13_snps <- getSNPlocs("ch13", as.GRanges=TRUE)
ch13=longevity_99[which(longevity_99$Chr=="13"),]
mypos <- ch13$Position
idx <- match(mypos, start(ch13_snps))
rsids13 <- mcols(ch13_snps)$RefSNP_id[idx]
ch13$rsid=rsids13
ch13$rsid_rs=paste0("rs", ch13$rsid)
ch13.t = data.table(ch13, key="SNP")
ch14_snps <- getSNPlocs("ch14", as.GRanges=TRUE)
ch14=longevity_99[which(longevity_99$Chr=="14"),]
mypos <- ch14$Position
idx <- match(mypos, start(ch14_snps))
rsids14 <- mcols(ch14_snps)$RefSNP_id[idx]
ch14$rsid=rsids14
ch14$rsid_rs=paste0("rs", ch14$rsid)
ch14.t = data.table(ch14, key="SNP")
ch15_snps <- getSNPlocs("ch15", as.GRanges=TRUE)
ch15=longevity_99[which(longevity_99$Chr=="15"),]
mypos <- ch15$Position
idx <- match(mypos, start(ch15_snps))
rsids15 <- mcols(ch15_snps)$RefSNP_id[idx]
ch15$rsid=rsids15
ch15$rsid_rs=paste0("rs", ch15$rsid)
ch15.t = data.table(ch15, key="SNP")
ch16_snps <- getSNPlocs("ch16", as.GRanges=TRUE)
ch16=longevity_99[which(longevity_99$Chr=="16"),]
mypos <- ch16$Position
idx <- match(mypos, start(ch16_snps))
rsids16 <- mcols(ch16_snps)$RefSNP_id[idx]
ch16$rsid=rsids16
ch16$rsid_rs=paste0("rs", ch16$rsid)
ch16.t = data.table(ch16, key="SNP")
ch17_snps <- getSNPlocs("ch17", as.GRanges=TRUE)
ch17=longevity_99[which(longevity_99$Chr=="17"),]
mypos <- ch17$Position
idx <- match(mypos, start(ch17_snps))
rsids17 <- mcols(ch17_snps)$RefSNP_id[idx]
ch17$rsid=rsids17
ch17$rsid_rs=paste0("rs", ch17$rsid)
ch17.t = data.table(ch17, key="SNP")
ch18_snps <- getSNPlocs("ch18", as.GRanges=TRUE)
ch18=longevity_99[which(longevity_99$Chr=="18"),]
mypos <- ch18$Position
idx <- match(mypos, start(ch18_snps))
rsids18 <- mcols(ch18_snps)$RefSNP_id[idx]
ch18$rsid=rsids18
ch18$rsid_rs=paste0("rs", ch18$rsid)
ch18.t = data.table(ch18, key="SNP")
ch19_snps <- getSNPlocs("ch19", as.GRanges=TRUE)
ch19=longevity_99[which(longevity_99$Chr=="19"),]
mypos <- ch19$Position
idx <- match(mypos, start(ch19_snps))
rsids19 <- mcols(ch19_snps)$RefSNP_id[idx]
ch19$rsid=rsids19
ch19$rsid_rs=paste0("rs", ch19$rsid)
ch19.t = data.table(ch19, key="SNP")
ch20_snps <- getSNPlocs("ch20", as.GRanges=TRUE)
ch20=longevity_99[which(longevity_99$Chr=="20"),]
mypos <- ch20$Position
idx <- match(mypos, start(ch20_snps))
rsids20 <- mcols(ch20_snps)$RefSNP_id[idx]
ch20$rsid=rsids20
ch20$rsid_rs=paste0("rs", ch20$rsid)
ch20.t = data.table(ch20, key="SNP")
ch21_snps <- getSNPlocs("ch21", as.GRanges=TRUE)
ch21=longevity_99[which(longevity_99$Chr=="21"),]
mypos <- ch21$Position
idx <- match(mypos, start(ch21_snps))
rsids21 <- mcols(ch21_snps)$RefSNP_id[idx]
ch21$rsid=rsids21
ch21$rsid_rs=paste0("rs", ch21$rsid)
ch21.t = data.table(ch21, key="SNP")
ch22_snps <- getSNPlocs("ch22", as.GRanges=TRUE)
ch22=longevity_99[which(longevity_99$Chr=="22"),]
mypos <- ch22$Position
idx <- match(mypos, start(ch22_snps))
rsids22 <- mcols(ch22_snps)$RefSNP_id[idx]
ch22$rsid=rsids22
ch22$rsid_rs=paste0("rs", ch22$rsid)
ch22.t = data.table(ch22, key="SNP")
# by=c("SNP","Chr","Position","EA","NEA",
# "EAF","Beta","SE","P.value","Effective_N",
# "rsid","rsid_rs")
rm(list=ls()[! ls() %in% c("ch1.t","ch2.t","ch3.t","ch4.t","ch5.t","ch6.t","ch7.t",
"ch8.t","ch9.t","ch10.t",
"ch11.t","ch12.t","ch13.t",
"ch14.t","ch15.t","ch16.t","ch17.t","ch18.t",
"ch19.t","ch20.t","ch21.t","ch22.t")])
# Merge together and remove NAs
merged <- rbind(ch1.t,ch2.t,ch3.t,ch4.t,ch5.t,ch6.t,ch7.t,ch8.t,
ch9.t,ch10.t,ch11.t,ch12.t,ch13.t,ch14.t,
ch15.t,ch16.t,ch17.t,ch18.t,ch19.t,ch20.t,ch21.t,ch22.t)
rm(list=ls()[! ls() %in% c("merged")])
dim(merged)
write.csv(merged, '/n/home04/cdadams/metab_longevity/extreme_longevity99/longevity_99_rsid.csv')
long_clean <- na.omit(merged)
dim(long_clean)
write.csv(long_clean, '/n/home04/cdadams/metab_longevity/extreme_longevity99/NA_gone_longevity_99_rsid.csv')
Phenotype.vec<-c(metab_qtls[,1])
Funk.1<-function(PHENO)
{
# Loop over all metabolites
exposure_dat <- format_metab_qtls(subset(metab_qtls, phenotype==PHENO))
# LD clumping
exposure_dat <- clump_data(exposure_dat)
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "/n/home04/cdadams/MR-eqtlGen-long/long_for_mr.txt",
sep = '\t',
snp_col = 'rsid',
beta_col = 'beta1',
se_col = 'se',
effect_allele_col = 'a1',
#phenotype_col = 'tissue',#tissue
#units_col = 'tissue',
other_allele_col = 'a0',
eaf_col = 'freq1',
#samplesize_col = 'samplesize',
#ncase_col = 'ncase',
#ncontrol_col = 'ncontrol',
gene_col = 'gene_name',
pval_col = 'p',
id_col = 'longevity'
)
# Harmonization
dat <- harmonise_data(exposure_dat, outcome_dat, action=2)
dat$outcome="longevity"
# Mr analysis
res <- mr(dat)
# Sensitivity analyses
het.dat<-mr_heterogeneity(dat)
p1 <- mr_scatter_plot(res, dat)
# Report
rep <- mr_report(dat)
return(list(res,het.dat,res, p1, rep))
}
outcome.test <- Funk.1(Phenotype.vec)
out<-outcome.test[1]
out<-data.frame(out)
# Format the results
out$exp_b <- exp(out$b)
out$or <- out$exp_b
out$metabolite <- out$exposure
out$lci1 <- out$b-(1.96*out$se)
out$lci <- exp(out$lci1)
out$uci1 <- out$b+(1.96*out$se)
out$uci <- exp(out$uci1)
dim(out)
write.csv(out, "/n/home04/cdadams/metab_longevity/lifespan/results/lifespan_results.csv")
sig.ivw=out[which(out$method=="Inverse variance weighted"),]
dim(sig.ivw) #115 IVW
# Add FDR and Bonferroni corrections
p=sig.ivw$pval
fdr=p.adjust(p, method = 'fdr', n = length(p))
sig.ivw$fdr=fdr
bon=p.adjust(p, method = 'bonferroni', n = length(p))
sig.ivw$bon=bon
head(sig.ivw, n=100)
bonsigs=sig.ivw[which(sig.ivw$bon<0.05),]
summary(bonsigs$b)
write.csv(sig.ivw, '/n/home04/cdadams/metab_longevity/lifespan/results/lifespan_sig.ivw.fdr.bon.csv')
write.csv(bonsigs, '/n/home04/cdadams/metab_longevity/lifespan/results/lifespan_bonsigs.csv')
## < table of extent 0 >
setwd("/n/home04/cdadams/metab_longevity/healthspan")
Phenotype.vec<-c(metab_qtls[,1])
Funk.1<-function(PHENO)
{
# Loop over all metabolites
exposure_dat <- format_metab_qtls(subset(metab_qtls, phenotype==PHENO))
# LD clumping
exposure_dat <- clump_data(exposure_dat)
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "/n/home04/cdadams/metab_longevity/healthspan/healthspan_summary.csv",
sep = ',',
snp_col = 'SNPID',
beta_col = 'beta',
se_col = 'se',
effect_allele_col = 'EA',
other_allele_col = 'RA',
eaf_col = 'EAF',
)
# Harmonization
dat <- harmonise_data(exposure_dat, outcome_dat, action=2)
dat$outcome="longevity"
# Mr analysis
res <- mr(dat)
# Sensitivity analyses
het.dat<-mr_heterogeneity(dat)
p1 <- mr_scatter_plot(res, dat)
# Report
rep <- mr_report(dat)
return(list(res,het.dat,res, p1, rep))
}
outcome.test <- Funk.1(Phenotype.vec)
out<-outcome.test[1]
out<-data.frame(out)
# Format the results
out$exp_b <- exp(out$b)
out$or <- out$exp_b
out$metabolite <- out$exposure
out$lci1 <- out$b-(1.96*out$se)
out$lci <- exp(out$lci1)
out$uci1 <- out$b+(1.96*out$se)
out$uci <- exp(out$uci1)
dim(out)
write.csv(out, "/n/home04/cdadams/metab_longevity/healthspan/results/healthspan_results.csv")
sig.ivw=out[which(out$method=="Inverse variance weighted"),]
dim(sig.ivw) #115 IVW
# Add FDR and Bonferroni corrections
p=sig.ivw$pval
fdr=p.adjust(p, method = 'fdr', n = length(p))
sig.ivw$fdr=fdr
bon=p.adjust(p, method = 'bonferroni', n = length(p))
sig.ivw$bon=bon
head(sig.ivw, n=100)
bonsigs=sig.ivw[which(sig.ivw$bon<0.05),]
summary(bonsigs$b)
write.csv(sig.ivw, '/n/home04/cdadams/metab_longevity/healthspan/results/healthspan_sig.ivw.fdr.bon.csv')
write.csv(bonsigs, '/n/home04/cdadams/metab_longevity/healthspan/results/healthspan_bonsigs.csv')
# Saved in wrong directory initially
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results
mv *mr_report.md /n/home04/cdadams/metab_longevity/lifespan/results
mv index.html /n/home04/cdadams/metab_longevity/extreme_longevity
## < table of extent 0 >
setwd("/n/home04/cdadams/metab_longevity/extreme_longevity")
Phenotype.vec<-c(metab_qtls[,1])
Funk.1<-function(PHENO)
{
# Loop over all metabolites
exposure_dat <- format_metab_qtls(subset(metab_qtls, phenotype==PHENO))
# LD clumping
exposure_dat <- clump_data(exposure_dat)
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "/n/home04/cdadams/metab_longevity/extreme_longevity/NA_gone_longevity_90_rsid.csv",
sep = ',',
snp_col = 'rsid_rs',
beta_col = 'Beta',
se_col = 'SE',
effect_allele_col = 'EA',
#phenotype_col = 'tissue',#tissue
#units_col = 'tissue',
other_allele_col = 'NEA',
eaf_col = 'EAF',
samplesize_col = 'Effective_N',
#ncase_col = 'ncase',
#ncontrol_col = 'ncontrol',
#gene_col = 'gene_name',
pval_col = 'P.value',
id_col = 'extreme longevity (90th percentile)'
)
# Harmonization
dat <- harmonise_data(exposure_dat, outcome_dat, action=2)
dat$outcome="extreme longevity (90th percentile)"
# Mr analysis
res <- mr(dat)
# Sensitivity analyses
het.dat<-mr_heterogeneity(dat)
p1 <- mr_scatter_plot(res, dat)
# Report
rep <- mr_report(dat)
return(list(res,het.dat,res, p1, rep))
}
outcome.test <- Funk.1(Phenotype.vec)
out<-outcome.test[1]
out<-data.frame(out)
# Format the results
out$exp_b <- exp(out$b)
out$or <- out$exp_b
out$metabolite <- out$exposure
out$lci1 <- out$b-(1.96*out$se)
out$lci <- exp(out$lci1)
out$uci1 <- out$b+(1.96*out$se)
out$uci <- exp(out$uci1)
dim(out)
write.csv(out, "/n/home04/cdadams/metab_longevity/extreme_longevity/extreme_longevity90th_results.csv")
sig.ivw=out[which(out$method=="Inverse variance weighted"),]
dim(sig.ivw) # IVW
# Add FDR and Bonferroni corrections
p=sig.ivw$pval
fdr=p.adjust(p, method = 'fdr', n = length(p))
sig.ivw$fdr=fdr
bon=p.adjust(p, method = 'bonferroni', n = length(p))
sig.ivw$bon=bon
head(sig.ivw, n=100)
bonsigs=sig.ivw[which(sig.ivw$bon<0.05),]
summary(bonsigs$b)
write.csv(sig.ivw, '/n/home04/cdadams/metab_longevity/extreme_longevity/results/extreme_longevity90th_sig.ivw.fdr.bon.csv')
write.csv(bonsigs, '/n/home04/cdadams/metab_longevity/extreme_longevity/results/extreme_longevity90th_bonsigs.csv')
# Saved in wrong directory initially
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results
mv *mr_report.md /n/home04/cdadams/metab_longevity/lifespan/results
mv index.html /n/home04/cdadams/metab_longevity/extreme_longevity
## [1] 29 19
## < table of extent 0 >
setwd("/n/home04/cdadams/metab_longevity/extreme_longevity99")
Phenotype.vec<-c(metab_qtls[,1])
Funk.1<-function(PHENO)
{
# Loop over all metabolites
exposure_dat <- format_metab_qtls(subset(metab_qtls, phenotype==PHENO))
# LD clumping
exposure_dat <- clump_data(exposure_dat)
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "/n/home04/cdadams/metab_longevity/extreme_longevity99/NA_gone_longevity_99_rsid.csv",
sep = ',',
snp_col = 'rsid_rs',
beta_col = 'Beta',
se_col = 'SE',
effect_allele_col = 'EA',
#phenotype_col = 'tissue',#tissue
#units_col = 'tissue',
other_allele_col = 'NEA',
eaf_col = 'EAF',
samplesize_col = 'Effective_N',
#ncase_col = 'ncase',
#ncontrol_col = 'ncontrol',
#gene_col = 'gene_name',
pval_col = 'P.value',
id_col = 'extreme longevity (99th percentile)'
)
# Harmonization
dat <- harmonise_data(exposure_dat, outcome_dat, action=2)
dat$outcome="longevity"
# Mr analysis
res <- mr(dat)
# Sensitivity analyses
het.dat<-mr_heterogeneity(dat)
p1 <- mr_scatter_plot(res, dat)
# Report
rep <- mr_report(dat)
return(list(res,het.dat,res, p1, rep))
}
outcome.test <- Funk.1(Phenotype.vec)
out<-outcome.test[1]
out<-data.frame(out)
# Format the results
out$exp_b <- exp(out$b)
out$or <- out$exp_b
out$metabolite <- out$exposure
out$lci1 <- out$b-(1.96*out$se)
out$lci <- exp(out$lci1)
out$uci1 <- out$b+(1.96*out$se)
out$uci <- exp(out$uci1)
dim(out)
write.csv(out, "/n/home04/cdadams/metab_longevity/extreme_longevity99/results/extreme_longevity99_results.csv")
sig.ivw=out[which(out$method=="Inverse variance weighted"),]
dim(sig.ivw) # IVW
# Add FDR and Bonferroni corrections
p=sig.ivw$pval
fdr=p.adjust(p, method = 'fdr', n = length(p))
sig.ivw$fdr=fdr
bon=p.adjust(p, method = 'bonferroni', n = length(p))
sig.ivw$bon=bon
head(sig.ivw, n=100)
bonsigs=sig.ivw[which(sig.ivw$bon<0.05),]
summary(bonsigs$b)
write.csv(sig.ivw, '/n/home04/cdadams/metab_longevity/extreme_longevity99/results/extreme_longevity99th_sig.ivw.fdr.bon.csv')
write.csv(bonsigs, '/n/home04/cdadams/metab_longevity/extreme_longevity99/results/extreme_longevity99th_bonsigs.csv')
# Saved in wrong directory initially
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results
mv *mr_report.md /n/home04/cdadams/metab_longevity/lifespan/results
mv index.html /n/home04/cdadams/metab_longevity/extreme_longevity
## < table of extent 0 >
## [1] "L.LDL.P" "ApoB" "L.LDL.CE" "L.LDL.C" "L.LDL.FC" "Est.C" "L.LDL.PL"
## [8] "M.LDL.PL" "L.LDL.L"
## $lifespan
## [1] "XS.VLDL.P" "M.LDL.C" "IDL.L" "L.LDL.P" "XS.VLDL.L"
## [6] "ApoB" "IDL.TG" "XS.VLDL.TG" "M.LDL.L" "L.LDL.CE"
## [11] "IDL.FC" "M.LDL.CE" "Free.C" "S.LDL.P" "L.LDL.C"
## [16] "L.LDL.FC" "M.VLDL.C" "IDL.P" "S.LDL.L" "S.LDL.C"
## [21] "XS.VLDL.PL" "Est.C" "L.LDL.PL" "SM" "M.LDL.PL"
## [26] "IDL.C" "Serum.C" "FAw6" "L.LDL.L" "LDL.C"
## [31] "M.LDL.P" "IDL.PL"
##
## $healthspan
## [1] "ApoB" "Est.C" "M.LDL.PL" "L.LDL.L" "L.LDL.PL" "L.LDL.CE"
## [7] "L.LDL.FC" "L.LDL.P" "S.VLDL.L" "L.LDL.C"
##
## $long90
## [1] "IDL.PL" "IDL.FC" "IDL.TG" "L.LDL.FC" "ApoB"
## [6] "M.LDL.C" "L.LDL.CE" "SM" "M.LDL.L" "XS.VLDL.PL"
## [11] "IDL.C" "XS.VLDL.P" "Est.C" "LDL.C" "M.LDL.PL"
## [16] "S.LDL.P" "S.LDL.C" "XS.VLDL.L" "IDL.L" "L.LDL.P"
## [21] "IDL.P" "Serum.C" "L.LDL.PL" "S.LDL.L" "L.LDL.L"
## [26] "L.LDL.C" "M.LDL.P" "M.LDL.CE" "Free.C"
##
## $long99
## [1] "M.LDL.CE" "S.LDL.L" "Est.C" "L.LDL.L" "S.LDL.P"
## [6] "L.LDL.FC" "IDL.FC" "M.LDL.P" "L.LDL.P" "ApoB"
## [11] "M.LDL.L" "LDL.C" "M.LDL.PL" "M.LDL.C" "IDL.PL"
## [16] "Serum.C" "IDL.L" "IDL.C" "S.LDL.C" "XS.VLDL.P"
## [21] "L.LDL.CE" "L.LDL.C" "IDL.P" "XS.VLDL.PL" "L.LDL.PL"
Taking the negation puts the the healthspan measure on the same scale with the same interpretation as the other three longevity measures. It makes it a protective ratio, like the lifespan measure.
setwd("/n/home04/cdadams/metab_longevity/healthspan")
negation=read.table("/n/home04/cdadams/metab_longevity/healthspan/healthspan_summary.csv", header = TRUE, sep = ",", dec = ".")
head(negation)
negation$negation = negation$beta*(-1)
write.csv(negation, "/n/home04/cdadams/metab_longevity/healthspan/negation.csv")
Phenotype.vec<-c(metab_qtls[,1])
Funk.1<-function(PHENO)
{
# Loop over all metabolites
exposure_dat <- format_metab_qtls(subset(metab_qtls, phenotype==PHENO))
# LD clumping
exposure_dat <- clump_data(exposure_dat)
outcome_dat <- read_outcome_data(
snps = exposure_dat$SNP,
filename = "/n/home04/cdadams/metab_longevity/healthspan/negation.csv",
sep = ',',
snp_col = 'SNPID',
beta_col = 'negation',
se_col = 'se',
effect_allele_col = 'EA',
other_allele_col = 'RA',
eaf_col = 'EAF',
)
# Harmonization
dat <- harmonise_data(exposure_dat, outcome_dat, action=2)
dat$outcome="longevity"
# Mr analysis
res <- mr(dat)
# Sensitivity analyses
het.dat<-mr_heterogeneity(dat)
p1 <- mr_scatter_plot(res, dat)
# Report
rep <- mr_report(dat)
return(list(res,het.dat,res, p1, rep))
}
outcome.test <- Funk.1(Phenotype.vec)
out<-outcome.test[1]
out<-data.frame(out)
# Format the results
out$exp_b <- exp(out$b)
out$or <- out$exp_b
out$metabolite <- out$exposure
out$lci1 <- out$b-(1.96*out$se)
out$lci <- exp(out$lci1)
out$uci1 <- out$b+(1.96*out$se)
out$uci <- exp(out$uci1)
dim(out)
write.csv(out, "/n/home04/cdadams/metab_longevity/healthspan/results/NEGATION_healthspan_results.csv")
sig.ivw=out[which(out$method=="Inverse variance weighted"),]
dim(sig.ivw) #115 IVW
# Add FDR and Bonferroni corrections
p=sig.ivw$pval
fdr=p.adjust(p, method = 'fdr', n = length(p))
sig.ivw$fdr=fdr
bon=p.adjust(p, method = 'bonferroni', n = length(p))
sig.ivw$bon=bon
head(sig.ivw, n=100)
bonsigs=sig.ivw[which(sig.ivw$bon<0.05),]
summary(bonsigs$b)
write.csv(sig.ivw, '/n/home04/cdadams/metab_longevity/healthspan/results/NEGATION_healthspan_sig.ivw.fdr.bon.csv')
write.csv(bonsigs, '/n/home04/cdadams/metab_longevity/healthspan/results/NEGATION_healthspan_bonsigs.csv')
# Saved in wrong directory initially
mv *.html /n/home04/cdadams/metab_longevity/lifespan/results
mv *mr_report.md /n/home04/cdadams/metab_longevity/lifespan/results
mv index.html /n/home04/cdadams/metab_longevity/extreme_longevity
## < table of extent 0 >