Appendix
##Data preparation steps
#set working directory
"C:/Users/ladki/Desktop/AD_Project"
#_extract relevant datasets from
#https://www.ncbi.nlm.nih.gov/gap/phegeni
#_load dependencies
library(readxl) #to call in excel datafile
library(kableExtra) #to create tables
library(tidyverse) #for data manipulation
library(dplyr) #for data manipulation
#install.packages("devtools") #for referencing with APA 7th ed.
#devtools::install_github("crsh/papaja") #for referencing with APA
#remotes::install_github("crsh/papaja@devel") #for referencing with APA 7th ed.
library(tinytex) #for referencing with APA 7th ed and papaja.
library(papaja) #for referencing with APA 7th ed.
r_refs("r-adRef.bib") #reference .bib r text file
#install.packages("ggplot2")
library(ggplot2) #for data visualisation
library(ggrepel) #for data visualisation
library(forcats) #for data manipulation
library(scales) #for data manipulation
#if (!require("BiocManager", quietly = TRUE))
#install.packages("BiocManager")
#BiocManager::install("mixOmics")
library(mixOmics) #for pca
#_call-in data files
ad<-read_excel("./data/AD.xlsx", sheet=1) #AD data
searchSummary<-read_excel("./data/AD.xlsx", sheet=2) #searchSummary data
studyDesigns<-read_excel("./data/AD.xlsx", sheet=3) #studyDesign data
#_research data summary
#research data distribution - search summary, table 1
s<-"" #create variable for blank
searchSummary[is.na(searchSummary)]<-s#rename NA
#create searchSummary table
searchSummary %>%
kbl(caption = "_Seach summary for Alzhermer's disease from PheGenI_") %>% #Table title
kable_classic(full_width=F, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #output heading and body layout
#research data distribution - study designs, table 2
studyDesigns<-as.data.frame(studyDesigns)
studyDesigns[is.na(studyDesigns)]<-s#rename NA
#remove number column in studyDesigns
studyDesigns<-studyDesigns[, 2:ncol(studyDesigns)]
#create studyDesigns table
studyDesigns %>%
kbl(caption = "_Summary of study designs for Alzheimer's disease from PheGenI_") %>% #Table title
kable_classic(full_width=F, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #output heading and body layout
#_exploratory data analysis
#total snprs distribution, table 3
#data distribution for the number of snprs for each ethnic group
p<-table(ad$Population) #create ethnicity summary
rownames (p)[rownames(p)=="NR"]= ("***NR") #change row name label
p<-data.frame(p) #dataframe ethnicity summary
p<-rename(p, Ethnic_group=Var1) #rename Var1 as Ethnic_group
p<-rename(p, Research_summary=Freq) #rename Freq to Research_summary
p<-p[order(p$Research_summary, decreasing=TRUE),] #change Research_summary count to descending order
studyPbar_set<-p #duplicate p for bar plot
p[9, ] <- c("" , "Total = 873", "") #create row for total
p <- sapply(p, as.character) #convert to as.character
p[is.na(p)] <-""#replace NA with blank
p<-data.frame(p) #convert to dataframe
rownames(p)<- c() #remove row names
#create table
p %>%
kbl(caption = "_SNP rs counts for each ethnic group in association with Alzheimer's disease_") %>% #Table title
kable_classic(full_width=T, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #%>% #output heading and body layout
#row_spec(2, strikeout = T) #strikeout row 2
#total snprs distribution, figure 1
#create research summary bar plot
studyPbar_set %>% #create pipeline to allocate percentages
mutate(Percent = percent(Research_summary / sum(Research_summary))) -> studyPbar_set
studyPbar<-ggplot(studyPbar_set, aes(x=reorder(Ethnic_group, -Research_summary), y=Research_summary, fill=("Percent"))) +
geom_col() + #add color to bar
geom_text(
aes(label=Percent), #add percentage
hjust=0.5, vjust=-0.5, size=5) + #adjust percentage text position
scale_y_continuous(limits=c(NA, 450)) + #length of y-axis
labs(title=expression(underline(bold(The~"SNP rs and Percentage scores for ethnic groups of Alzheimer's disease")))) +
labs(x=expression(bold("Ethnic group")), y=expression(bold("Number of SNP rs"))) +#underlined title
theme(plot.title.position="plot") +
theme_classic() +
theme(axis.text.x = element_text(angle=-90, vjust=0.5, hjust=0.5, size=14)) +
theme(axis.text.y = element_text(angle=-90, vjust=0.5, hjust=0.5, size=14)) +
theme(
plot.title = element_text((hjust=0.7), size = 16), #adjust plot title position
axis.title.x = element_text(vjust=-1, size=14), #adjust x-axis title position
axis.title.y = element_text(vjust=1.8, size=14))
studyPbar
#snprs data distribution with duplicates removed, table 4
#Identify unique SNPs for each ethnic group
#main dataset "ad" manipulation
ad$'P-Value' <- format(ad$'P-Value', scientific=F) #turn-off scientific notation for P-Value
ad <- filter(ad, 'P-Value' >=0.00001) #filter P-Value <1*10^-5, all 17 variables and 873 obs retained
#assign numeric levels to population - factor levels
snpAll<-ad
snpAll$Population <- ifelse(snpAll$Population=="African|African American", 0,
ifelse(snpAll$Population=="African|European|African American|Indian American", 1,
ifelse(snpAll$Population=="East Asian|Asian|European", 2,
ifelse(snpAll$Population=="East Asian|European", 3,
ifelse(snpAll$Population=="European", 4,
ifelse(snpAll$Population=="European|Hispanic", 5,
ifelse(snpAll$Population=="Hispanic", 6, NA)))))))
snpAll$Population <- factor(snpAll$Population,
levels=c(0,1,2,3,4,5,6))
#labels=c("African|African American", "African|European|African American|Indian American", "East Asian|Asian|European", "East Asian|European", "European", "European|Hispanic", "Hispanic"))
#subset each ethnic group and remove duplicates
#Ethnic group - European "p_e"
p_e <-subset(snpAll, Population==4) #17 variables, 431 obs
p_e <- p_e[!duplicated(p_e[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 394 obs
#Ethnic group - American|African American "p_fm"
p_fm <- subset(snpAll, Population==0) #17 variables, 139 obs
p_fm <- p_fm[!duplicated(p_fm[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 134 obs
#Ethnic group - African|European|African American|Indian American "p_femi"
p_femi <- subset(snpAll, Population==1) #17 variables, 18 obs
p_femi <- p_femi[!duplicated(p_femi[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 18 obs
#Ethnic group - East Asian|European "p_se"
p_se <- subset(snpAll, Population==3) #17 variables, 3 obs
p_se <- p_se[!duplicated(p_se[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 3 obs
#Ethnic group - Hispanic "p_h"
p_h <- subset(snpAll, Population==6) #17 variables, 3 obs
p_h <- p_h[!duplicated(p_h[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 3 obs
#Ethnic group - East Asian|Asian|European "p_sne"
p_sne <- subset(snpAll, Population==2) #17 variables, 2 obs
p_sne <- p_sne[!duplicated(p_sne[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 2 obs
#Ethnic group - European|Hispanic "p_eh"
p_eh <- subset(snpAll, Population==5) #17 variables, 2 obs
p_eh <- p_eh[!duplicated(p_eh[c('SNP rs')]),] #remove duplicate SNPrs, 17 variables, 2 obs
#re-group the reduced SNPrs of each ethnic group to a new df
snpReduced<-bind_rows(p_e, p_fm, p_femi, p_se, p_h, p_sne, p_eh) #bind all SNPrs from individual ethnic groups, 17 variables,556 obs
snpRtable<-table(snpReduced$Population) #summarise SNPrs for each ethnic group
snpRtable<-data.frame(snpRtable) #dataframe SNPrs summary
snpRtable<-rename(snpRtable, Ethnic_group=Var1) #rename Var1 as Ethnic_group
snpRtable<-rename(snpRtable, SNPrs_summary=Freq) #rename Freq to Research_summary
snpRtable$Ethnic_group <- factor(snpRtable$Ethnic_group, #factor labels for levels
levels=c(0,1,2,3,4,5,6),
labels=c("African|African American", "African|European|African American|Indian American", "East Asian|Asian|European", "East Asian|European", "European", "European|Hispanic", "Hispanic"))
snpRtable<-snpRtable[order(snpRtable$SNPrs_summary, decreasing=TRUE),] #change SNPrs_summary count to descending order
snpRpie_set<-snpRtable #duplicate snpRtable for pie chart
snpRtable[8, ] <- c("" , "Total = 556", "") #create row for total
snpRtable <- sapply(snpRtable, as.character) #convert to as.character
snpRtable[is.na(snpRtable)] <-""#replace NA with blank
snpRtable<-data.frame(snpRtable) #convert to dataframe
rownames(snpRtable)<- c() #remove row names
#create table summary for SNPrs of each ethnic group
snpRtable %>%
kbl(caption = "_SNP rs summary, with duplicates removed, for each ethnic group associated with Alzheimer's disease_") %>% #table title
kable_classic(full_width=T, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray")
#snprs data distribution with duplicates removed, figure 2
#create pie chart for reduced SNPrs distribution
snpRpie_set %>% #create pipeline to allocate percentages
mutate(Percentage = percent(SNPrs_summary / sum(SNPrs_summary))) -> snpRpie_set
snpRpie <- ggplot(snpRpie_set, aes(x = "", y = SNPrs_summary, fill = fct_inorder(Ethnic_group))) + #order by ethnic group
geom_col(width = 1, color = 1) +
coord_polar(theta="y") +
geom_label_repel(aes(label = Percentage), size=3, show.legend = F, nudge_x = 1) + #fill pie with percentages
guides(fill = guide_legend(title ="Ethnic group")) +
labs(title=expression(underline("SNP rs percentage distribution, with duplicates removed, for each Ethnic group"))) +
theme(plot.title.position="plot") +
theme_void()
snpRpie <- snpRpie +
theme(
plot.title = element_text(hjust=-1)) #adjust plot title position
snpRpie
#_identification of unique snprs
#unique snprs summary - table 5
#find unmatched unique SNPrs for each ethnic group
#Q = unique
snpQ_e<-p_e #unique SNPrs - European population "e", 17 variables, 394 obs
snpQ_fm<-p_fm[!(p_fm$`SNP rs` %in% snpQ_e$`SNP rs`),] #unique SNPrs - African|African American population "fm", 17 variables, 133 obs
snpQ_femi<-p_femi[!(p_femi$`SNP rs` %in% snpQ_e$`SNP rs`),] #unique SNPrs - African|European|African American|Indian American "femi", 17 variables, 18 obs
snpQ_se<-p_se[!(p_se$`SNP rs` %in% snpQ_e$`SNP rs`),]#unique SNPrs - East Asian|European "se", 17 variables, 3 obs
snpQ_h<-p_h[!(p_h$`SNP rs` %in% snpQ_e$`SNP rs`),]#unique SNPrs - Hispanic "h", 17 variables, 3 obs
snpQ_sne<-p_sne[!(p_sne$`SNP rs` %in% snpQ_e$`SNP rs`),]#unique SNPrs - East Asian|Asian|European "sne", 17 variables, 1 obs
snpQ_eh<-p_eh[!(p_eh$`SNP rs` %in% snpQ_e$`SNP rs`),]#unique SNPrs - European|Hispanic "eh", 17 variables, 1 obs
#group the unmatched unique SNPrs of each ethnic group
snpQ<-bind_rows(snpQ_e, snpQ_fm, snpQ_femi, snpQ_se, snpQ_h, snpQ_sne, snpQ_eh) #bind all unique SNPrs from individual ethnic groups, 17 variables,553 obs
snpQtable<-table(snpQ$Population) #summarise unique SNPrs for each ethnic group
snpQtable<-data.frame(snpQtable) #dataframe SNPrs summary
snpQtable<-rename(snpQtable, Ethnic_group=Var1) #rename Var1 as Ethnic_group
snpQtable<-rename(snpQtable, Unique_SNPrs_summary=Freq) #rename Freq to Unique SNPrs summary
snpQtable$Ethnic_group <- factor(snpQtable$Ethnic_group, #factor labels for levels
levels=c(0,1,2,3,4,5,6),
labels=c("African|African American", "African|European|African American|Indian American", "East Asian|Asian|European", "East Asian|European", "European", "European|Hispanic", "Hispanic"))
snpQtable<-snpQtable[order(snpQtable$Unique_SNPrs_summary, decreasing=TRUE),] #change Unique SNPrs summary count to descending order
snpQtable %>% #create pipeline to allocate percentage scores
mutate(Percentage = percent(Unique_SNPrs_summary / sum(Unique_SNPrs_summary))) -> snpQtable
snpQtable[8, ] <- c("" , "Total = 553", "") #create row for total
snpQtable <- sapply(snpQtable, as.character) #convert to as.character
snpQtable[is.na(snpQtable)] <-""#replace NA with blank
snpQtable<-data.frame(snpQtable) #convert to dataframe
rownames(snpQtable)<- c() #remove row names
#create table summary for SNPrs of each ethnic group
snpQtable %>%
kbl(caption = "_Unique SNP rs summary for each ethnic group associated with Alzheimer's disease_") %>% #table title
kable_classic(full_width=T, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #%>% #output heading and body layout
#principal component analysis - figure 3
#create pca plot
c<-c(3, 11, 17) #create specific column selection for pca
snpQpca_set<-snpQ[,c] #save specific columns to new variable
snpQpca_set$Population <- factor(snpQpca_set$Population, #factor labels for levels
levels=c(0,1,2,3,4,5,6),
labels=c("African|African American", "African|European|African American|Indian American", "East Asian|Asian|European", "East Asian|European", "European", "European|Hispanic", "Hispanic"))
snpQpca_set$`P-Value`<-qnorm(as.numeric(snpQpca_set$`P-Value`)) #convert p-values to Z scores
snpQpca<-pca(snpQpca_set)#run method for pca
plotIndiv(snpQpca, group = snpQpca_set$Population, legend = TRUE, title = "SNP rs, PCA", size.title = rel(1.3), legend.title = "Ethnic group") #plots samples for pca
#unique snprs and chromosome summary - table 6
#snprs to chromosome distribution for all ethnic groups together
snpQchr_All<-as.data.frame(table(snpQ$Chromosome)) #create df to explore snprs to chromosome distribution
snpQchr_All<-rename(snpQchr_All, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_All<-rename(snpQchr_All, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_All$Chromosome<- ifelse(snpQchr_All$Chromosome=="1", 1,
ifelse(snpQchr_All$Chromosome=="2", 2,
ifelse(snpQchr_All$Chromosome=="3", 3,
ifelse(snpQchr_All$Chromosome=="4", 4,
ifelse(snpQchr_All$Chromosome=="5", 5,
ifelse(snpQchr_All$Chromosome=="6", 6,
ifelse(snpQchr_All$Chromosome=="7", 7,
ifelse(snpQchr_All$Chromosome=="8", 8,
ifelse(snpQchr_All$Chromosome=="9", 9,
ifelse(snpQchr_All$Chromosome=="10", 10,
ifelse(snpQchr_All$Chromosome=="11", 11,
ifelse(snpQchr_All$Chromosome=="12", 12,
ifelse(snpQchr_All$Chromosome=="13", 13,
ifelse(snpQchr_All$Chromosome=="14", 14,
ifelse(snpQchr_All$Chromosome=="15", 15,
ifelse(snpQchr_All$Chromosome=="16", 16,
ifelse(snpQchr_All$Chromosome=="17", 17,
ifelse(snpQchr_All$Chromosome=="18", 18,
ifelse(snpQchr_All$Chromosome=="19", 19,
ifelse(snpQchr_All$Chromosome=="20", 20,
ifelse(snpQchr_All$Chromosome=="21", 21,
ifelse(snpQchr_All$Chromosome=="22", 22,
ifelse(snpQchr_All$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_All<-snpQchr_All[order(snpQchr_All$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "e"
snpQchr_e<-as.data.frame(table(snpQ_e$Chromosome))
snpQchr_e<-rename(snpQchr_e, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_e<-rename(snpQchr_e, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_e$Chromosome<- ifelse(snpQchr_e$Chromosome=="1", 1,
ifelse(snpQchr_e$Chromosome=="2", 2,
ifelse(snpQchr_e$Chromosome=="3", 3,
ifelse(snpQchr_e$Chromosome=="4", 4,
ifelse(snpQchr_e$Chromosome=="5", 5,
ifelse(snpQchr_e$Chromosome=="6", 6,
ifelse(snpQchr_e$Chromosome=="7", 7,
ifelse(snpQchr_e$Chromosome=="8", 8,
ifelse(snpQchr_e$Chromosome=="9", 9,
ifelse(snpQchr_e$Chromosome=="10", 10,
ifelse(snpQchr_e$Chromosome=="11", 11,
ifelse(snpQchr_e$Chromosome=="12", 12,
ifelse(snpQchr_e$Chromosome=="13", 13,
ifelse(snpQchr_e$Chromosome=="14", 14,
ifelse(snpQchr_e$Chromosome=="15", 15,
ifelse(snpQchr_e$Chromosome=="16", 16,
ifelse(snpQchr_e$Chromosome=="17", 17,
ifelse(snpQchr_e$Chromosome=="18", 18,
ifelse(snpQchr_e$Chromosome=="19", 19,
ifelse(snpQchr_e$Chromosome=="20", 20,
ifelse(snpQchr_e$Chromosome=="21", 21,
ifelse(snpQchr_e$Chromosome=="22", 22,
ifelse(snpQchr_e$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_e<-snpQchr_e[order(snpQchr_e$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "fm"
snpQchr_fm<-as.data.frame(table(snpQ_fm$Chromosome))
snpQchr_fm<-rename(snpQchr_fm, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_fm<-rename(snpQchr_fm, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_fm$Chromosome<- ifelse(snpQchr_fm$Chromosome=="1", 1,
ifelse(snpQchr_fm$Chromosome=="2", 2,
ifelse(snpQchr_fm$Chromosome=="3", 3,
ifelse(snpQchr_fm$Chromosome=="4", 4,
ifelse(snpQchr_fm$Chromosome=="5", 5,
ifelse(snpQchr_fm$Chromosome=="6", 6,
ifelse(snpQchr_fm$Chromosome=="7", 7,
ifelse(snpQchr_fm$Chromosome=="8", 8,
ifelse(snpQchr_fm$Chromosome=="9", 9,
ifelse(snpQchr_fm$Chromosome=="10", 10,
ifelse(snpQchr_fm$Chromosome=="11", 11,
ifelse(snpQchr_fm$Chromosome=="12", 12,
ifelse(snpQchr_fm$Chromosome=="13", 13,
ifelse(snpQchr_fm$Chromosome=="14", 14,
ifelse(snpQchr_fm$Chromosome=="15", 15,
ifelse(snpQchr_fm$Chromosome=="16", 16,
ifelse(snpQchr_fm$Chromosome=="17", 17,
ifelse(snpQchr_fm$Chromosome=="18", 18,
ifelse(snpQchr_fm$Chromosome=="19", 19,
ifelse(snpQchr_fm$Chromosome=="20", 20,
ifelse(snpQchr_fm$Chromosome=="21", 21,
ifelse(snpQchr_fm$Chromosome=="22", 22,
ifelse(snpQchr_fm$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_fm<-snpQchr_fm[order(snpQchr_fm$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "femi"
snpQchr_femi<-as.data.frame(table(snpQ_femi$Chromosome))
snpQchr_femi<-rename(snpQchr_femi, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_femi<-rename(snpQchr_femi, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_femi$Chromosome<- ifelse(snpQchr_femi$Chromosome=="1", 1,
ifelse(snpQchr_femi$Chromosome=="2", 2,
ifelse(snpQchr_femi$Chromosome=="3", 3,
ifelse(snpQchr_femi$Chromosome=="4", 4,
ifelse(snpQchr_femi$Chromosome=="5", 5,
ifelse(snpQchr_femi$Chromosome=="6", 6,
ifelse(snpQchr_femi$Chromosome=="7", 7,
ifelse(snpQchr_femi$Chromosome=="8", 8,
ifelse(snpQchr_femi$Chromosome=="9", 9,
ifelse(snpQchr_femi$Chromosome=="10", 10,
ifelse(snpQchr_femi$Chromosome=="11", 11,
ifelse(snpQchr_femi$Chromosome=="12", 12,
ifelse(snpQchr_femi$Chromosome=="13", 13,
ifelse(snpQchr_femi$Chromosome=="14", 14,
ifelse(snpQchr_femi$Chromosome=="15", 15,
ifelse(snpQchr_femi$Chromosome=="16", 16,
ifelse(snpQchr_femi$Chromosome=="17", 17,
ifelse(snpQchr_femi$Chromosome=="18", 18,
ifelse(snpQchr_femi$Chromosome=="19", 19,
ifelse(snpQchr_femi$Chromosome=="20", 20,
ifelse(snpQchr_femi$Chromosome=="21", 21,
ifelse(snpQchr_femi$Chromosome=="22", 22,
ifelse(snpQchr_femi$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_femi<-snpQchr_femi[order(snpQchr_femi$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "se"
snpQchr_se<-as.data.frame(table(snpQ_se$Chromosome))
snpQchr_se<-rename(snpQchr_se, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_se<-rename(snpQchr_se, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_se$Chromosome<- ifelse(snpQchr_se$Chromosome=="1", 1,
ifelse(snpQchr_se$Chromosome=="2", 2,
ifelse(snpQchr_se$Chromosome=="3", 3,
ifelse(snpQchr_se$Chromosome=="4", 4,
ifelse(snpQchr_se$Chromosome=="5", 5,
ifelse(snpQchr_se$Chromosome=="6", 6,
ifelse(snpQchr_se$Chromosome=="7", 7,
ifelse(snpQchr_se$Chromosome=="8", 8,
ifelse(snpQchr_se$Chromosome=="9", 9,
ifelse(snpQchr_se$Chromosome=="10", 10,
ifelse(snpQchr_se$Chromosome=="11", 11,
ifelse(snpQchr_se$Chromosome=="12", 12,
ifelse(snpQchr_se$Chromosome=="13", 13,
ifelse(snpQchr_se$Chromosome=="14", 14,
ifelse(snpQchr_se$Chromosome=="15", 15,
ifelse(snpQchr_se$Chromosome=="16", 16,
ifelse(snpQchr_se$Chromosome=="17", 17,
ifelse(snpQchr_se$Chromosome=="18", 18,
ifelse(snpQchr_se$Chromosome=="19", 19,
ifelse(snpQchr_se$Chromosome=="20", 20,
ifelse(snpQchr_se$Chromosome=="21", 21,
ifelse(snpQchr_se$Chromosome=="22", 22,
ifelse(snpQchr_se$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_se<-snpQchr_se[order(snpQchr_se$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "h"
snpQchr_h<-as.data.frame(table(snpQ_h$Chromosome))
snpQchr_h<-rename(snpQchr_h, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_h<-rename(snpQchr_h, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_h$Chromosome<- ifelse(snpQchr_h$Chromosome=="1", 1,
ifelse(snpQchr_h$Chromosome=="2", 2,
ifelse(snpQchr_h$Chromosome=="3", 3,
ifelse(snpQchr_h$Chromosome=="4", 4,
ifelse(snpQchr_h$Chromosome=="5", 5,
ifelse(snpQchr_h$Chromosome=="6", 6,
ifelse(snpQchr_h$Chromosome=="7", 7,
ifelse(snpQchr_h$Chromosome=="8", 8,
ifelse(snpQchr_h$Chromosome=="9", 9,
ifelse(snpQchr_h$Chromosome=="10", 10,
ifelse(snpQchr_h$Chromosome=="11", 11,
ifelse(snpQchr_h$Chromosome=="12", 12,
ifelse(snpQchr_h$Chromosome=="13", 13,
ifelse(snpQchr_h$Chromosome=="14", 14,
ifelse(snpQchr_h$Chromosome=="15", 15,
ifelse(snpQchr_h$Chromosome=="16", 16,
ifelse(snpQchr_h$Chromosome=="17", 17,
ifelse(snpQchr_h$Chromosome=="18", 18,
ifelse(snpQchr_h$Chromosome=="19", 19,
ifelse(snpQchr_h$Chromosome=="20", 20,
ifelse(snpQchr_h$Chromosome=="21", 21,
ifelse(snpQchr_h$Chromosome=="22", 22,
ifelse(snpQchr_h$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_h<-snpQchr_h[order(snpQchr_h$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "sne"
snpQchr_sne<-as.data.frame(table(snpQ_sne$Chromosome))
snpQchr_sne<-rename(snpQchr_sne, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_sne<-rename(snpQchr_sne, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_sne$Chromosome<- ifelse(snpQchr_sne$Chromosome=="1", 1,
ifelse(snpQchr_sne$Chromosome=="2", 2,
ifelse(snpQchr_sne$Chromosome=="3", 3,
ifelse(snpQchr_sne$Chromosome=="4", 4,
ifelse(snpQchr_sne$Chromosome=="5", 5,
ifelse(snpQchr_sne$Chromosome=="6", 6,
ifelse(snpQchr_sne$Chromosome=="7", 7,
ifelse(snpQchr_sne$Chromosome=="8", 8,
ifelse(snpQchr_sne$Chromosome=="9", 9,
ifelse(snpQchr_sne$Chromosome=="10", 10,
ifelse(snpQchr_sne$Chromosome=="11", 11,
ifelse(snpQchr_sne$Chromosome=="12", 12,
ifelse(snpQchr_sne$Chromosome=="13", 13,
ifelse(snpQchr_sne$Chromosome=="14", 14,
ifelse(snpQchr_sne$Chromosome=="15", 15,
ifelse(snpQchr_sne$Chromosome=="16", 16,
ifelse(snpQchr_sne$Chromosome=="17", 17,
ifelse(snpQchr_sne$Chromosome=="18", 18,
ifelse(snpQchr_sne$Chromosome=="19", 19,
ifelse(snpQchr_sne$Chromosome=="20", 20,
ifelse(snpQchr_sne$Chromosome=="21", 21,
ifelse(snpQchr_sne$Chromosome=="22", 22,
ifelse(snpQchr_sne$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_sne<-snpQchr_sne[order(snpQchr_sne$Chromosome),] #change chromosome to ascending order
#snprs to chromosome distribution for "eh"
snpQchr_eh<-as.data.frame(table(snpQ_eh$Chromosome))
snpQchr_eh<-rename(snpQchr_eh, Chromosome=Var1) #rename Var1 as Chromosome
snpQchr_eh<-rename(snpQchr_eh, count=Freq) #rename Freq as count
#assign levels to chromosomes
snpQchr_eh$Chromosome<- ifelse(snpQchr_eh$Chromosome=="1", 1,
ifelse(snpQchr_eh$Chromosome=="2", 2,
ifelse(snpQchr_eh$Chromosome=="3", 3,
ifelse(snpQchr_eh$Chromosome=="4", 4,
ifelse(snpQchr_eh$Chromosome=="5", 5,
ifelse(snpQchr_eh$Chromosome=="6", 6,
ifelse(snpQchr_eh$Chromosome=="7", 7,
ifelse(snpQchr_eh$Chromosome=="8", 8,
ifelse(snpQchr_eh$Chromosome=="9", 9,
ifelse(snpQchr_eh$Chromosome=="10", 10,
ifelse(snpQchr_eh$Chromosome=="11", 11,
ifelse(snpQchr_eh$Chromosome=="12", 12,
ifelse(snpQchr_eh$Chromosome=="13", 13,
ifelse(snpQchr_eh$Chromosome=="14", 14,
ifelse(snpQchr_eh$Chromosome=="15", 15,
ifelse(snpQchr_eh$Chromosome=="16", 16,
ifelse(snpQchr_eh$Chromosome=="17", 17,
ifelse(snpQchr_eh$Chromosome=="18", 18,
ifelse(snpQchr_eh$Chromosome=="19", 19,
ifelse(snpQchr_eh$Chromosome=="20", 20,
ifelse(snpQchr_eh$Chromosome=="21", 21,
ifelse(snpQchr_eh$Chromosome=="22", 22,
ifelse(snpQchr_eh$Chromosome=="X", 23,NA)))))))))))))))))))))))
snpQchr_eh<-snpQchr_eh[order(snpQchr_eh$Chromosome),] #change chromosome to ascending order
#create table for snprs for each chromosome for each population
snpQchromPtabe<-data.frame(vars= c("Total number of genes = 496", "**Overall SNP count** _(n_", "**Chromosome** _(n_", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y"), #create df with snprs and chromosome data for each ethnic group, and total number of genes
European= c(s, snpQtable$Unique_SNPrs_summary[1], s, snpQchr_e$count[1:23], s),
African_AfricanAmerican= c(s, snpQtable$Unique_SNPrs_summary[2], s, snpQchr_fm$count[1:20], s, snpQchr_fm$count[21], s,s),
African_European_AfricanAmerican_IndianAmerican= c(s, snpQtable$Unique_SNPrs_summary[3], s, s, snpQchr_femi$count[1:4], s, s, s, snpQchr_femi$count[5:7], s, s, s, s, s, snpQchr_femi$count[8], s, snpQchr_femi$count[9], s, s, s, s, s),
EastAsian_European= c(s, snpQtable$Unique_SNPrs_summary[4], s, s, s, s, s, s, s, snpQchr_se$count[1], s, s, s, s, snpQchr_se$count[2], s, s, s, s, s, snpQchr_se$count[3], s, s, s, s, s, s),
Hispanic= c(s, snpQtable$Unique_SNPrs_summary[5], s, s, s, snpQchr_h$count[1], s, snpQchr_h$count[2], s, s, s, s, s, s, s, s, s, s, s, s, s, snpQchr_h$count[3], s, s, s, s, s),
EastAsian_Asian_European= c(s, snpQtable$Unique_SNPrs_summary[6], s, s, s, s, s, s, s, s, s, s, s, s, s, s, s, s, s, s, s, snpQchr_sne$count[1], s, s, s, s, s),
European_Hispanic= c(s,snpQtable$Unique_SNPrs_summary[7], s, s, s, s, s, s, s, s, s, s, snpQchr_eh$count[1], s, s, s, s, s, s, s, s, s, s, s, s, s, s),
stringsAsFactors = FALSE)
#create summary table of SNPrs to chromosome for each ethnic group, and total number of gene associations
kable(snpQchromPtabe,
col.names = c("", "European", "African|African American", "African|European|African American|Indian American", "East Asian|European", "Hispanic", "East Asian|Asian|European", "European|Hispanic"),
align="cccccccc",
type="") %>%
column_spec(1, width_min = "5cm", border_right = TRUE) %>%
column_spec(c(2,3,4,5,6,7), width_min = "2cm") %>%
row_spec(0, bold = T, color="ivory", background = "gray") %>%
row_spec(1:27, color="black", background = "white") %>%
kable_styling(bootstrap_options = ("bordered"), full_width=FALSE, font_size=12)
#most statistically significant and unique SNP rs for each ethnic group - table 7
#most significantly unique SNP for each ethnic group
snpQmost<-bind_rows(snpQ_e[1, ], snpQ_fm[1, ], snpQ_femi[1, ], snpQ_se[1, ], snpQ_h[1, ], snpQ_sne[1, ], snpQ_eh[1, ]) #gather most significant unmatched unique snprs
c2<-c(3:6, 9:11, 17) #select columns to keep
snpQmost<-snpQmost[,c2] #run columns to keep
snpQmost$`P-Value`[1]<-"1.999999999999999886549084e-157" #assign pv
snpQmost$`P-Value`[2]<-"2.000000000000000124466409e-9"
snpQmost$`P-Value`[3]<-"9.000000000000000069388939e-52"
snpQmost$`P-Value`[4]<-"9.999999999999999547237172e-7"
snpQmost$`P-Value`[5]<-"7.999999999999999516012150e-11"
snpQmost$`P-Value`[6]<-"4.999999999999999808746736e-39"
snpQmost$`P-Value`[7]<-"1.999999999999999909447434e-7"
snpQmost$Population <- factor(snpQmost$Population, #factor labels for levels
levels=c(0,1,2,3,4,5,6),
labels=c("African|African American", "African|European|African American|Indian American", "East Asian|Asian|European", "East Asian|European", "European", "European|Hispanic", "Hispanic"))
#create significantly most unique SNP rs table
snpQmost %>%
kbl(caption = "_Summary of the most statistically significant and unique SNP rs for each ethnic group associated with Alzheimer's disease_",
align = "lcccclll") %>% #table title
column_spec(c(1,2,3,4),width_min = "2.5cm") %>%
column_spec(c(5,6,7,8),width_min = "3cm") %>%
kable_classic(full_width=FALSE, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #output heading and body layout
#export unmatched unique snp rs data to "output" folder
#all ethnic groups
write.table(snpQ, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_allEthnicGroups.csv", row.names=FALSE, sep=",")
#Ethnic group - European
write.table(snpQ_e, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_european.csv", row.names=FALSE, sep=",")
#Ethnic group - American|African American
write.table(snpQ_fm, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_african_africanAmerican.csv", row.names=FALSE, sep=",")
#Ethnic group - African|European|African American|Indian American
write.table(snpQ_femi, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_african_european_africanAmerican_indianAmerican.csv", row.names=FALSE, sep=",")
#Ethnic group - East Asian|European
write.table(snpQ_se, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_eastAsian_european.csv", row.names=FALSE, sep=",")
#Ethnic group - Hispanic
write.table(snpQ_h, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_hispanic.csv", row.names=FALSE, sep=",")
#Ethnic group - East Asian|Asian|European
write.table(snpQ_sne, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_eastAsian_asian_european.csv", row.names=FALSE, sep=",")
#Ethnic group - European|Hispanic
write.table(snpQ_eh, file="C:/Users/ladki/Desktop/AD_Project/output\\uniqueSNP_european_hispanic.csv", row.names=FALSE, sep=",")
#pathway analysis of the unique SNP rs in association with genes - figure 4
knitr::include_graphics("C:/Users/ladki/Desktop/AD_Project/images/GO.png")
#pathway analysis of the unique SNP rs in association with genes - table 8
GO_AD<-read_excel("./data/GO_AD_function.xlsx", sheet=1) #GO_AD_functions data
GO_AD$`P-value`<-as.character(GO_AD$`P-value`)
#create table for GO_AD_functions terms
GO_AD %>%
kbl(caption = "_GO terms enriched in the enrichment pathway analysis with genes associated with the unique SNP rs of ethnic groups associated with Alzheimer's disease_ ",
align = "llcl") %>% #table title
column_spec(c(1,2,3,4), width_min = "3cm") %>%
kable_classic(full_width=FALSE, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #output heading and body layout
#unique SNP rs of Alzheimer's disease in association with other phenotypes from the PheGenI GWAS catalogue, table 9
ad_o_ph<- read_excel("./data/PheGenI_otherPhenotype.xlsx", sheet=2) #other phenotype data
ad_o_ph<-rename(ad_o_ph, Phenotype=Phenotype...1) #rename column
ad_o_ph<-rename(ad_o_ph, SNPrs_count=SNPrs_count...2) #rename column
ad_o_ph<-rename(ad_o_ph, Phenotype_continued=Phenotype...3) #rename column
ad_o_ph<-rename(ad_o_ph, SNPrs_count_continued=SNPrs_count...4) #rename column
ad_o_ph<-as.data.frame(ad_o_ph) #save as df to change na
ad_o_ph[is.na(ad_o_ph)]<-s #change na to blank
#create table for other phenotypes
ad_o_ph %>%
kbl(caption = "_Significant other phenotypes in association with the unique SNP rs for Alzheimer's disease_ ",
align = "rlrl") %>% #table title
column_spec(c(1,2,3,4), width_min = "3cm") %>%
kable_classic(full_width=FALSE, html_font = "Calibri") %>% #output width and font
kable_styling(table.envir = "ctable", font_size=16) %>% #output fontSize
row_spec(0, bold=T, color="white", background = "gray") #output heading and body layout