OVERVIEW

The chosen phenotype is Diabetes Mellitus 2, it is a complex, polygenic disease. We will be applying machine learning algorithms to produce a dendogram, Eucladian, Manhattan and PCA via known pleitropic SNPs

DATA PREP

All the cleaning steps

VISUALISATION

Final figures

library(adegenet)
## Warning: package 'adegenet' was built under R version 3.6.3
## Loading required package: ade4
## Warning: package 'ade4' was built under R version 3.6.3
## Registered S3 method overwritten by 'spdep':
##   method   from
##   plot.mst ape
## 
##    /// adegenet 2.1.3 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
library(ade4)
library(adegraphics)
## Warning: package 'adegraphics' was built under R version 3.6.3
## Registered S3 methods overwritten by 'adegraphics':
##   method         from
##   biplot.dudi    ade4
##   kplot.foucart  ade4
##   kplot.mcoa     ade4
##   kplot.mfa      ade4
##   kplot.pta      ade4
##   kplot.sepan    ade4
##   kplot.statis   ade4
##   scatter.coa    ade4
##   scatter.dudi   ade4
##   scatter.nipals ade4
##   scatter.pco    ade4
##   score.acm      ade4
##   score.mix      ade4
##   score.pca      ade4
##   screeplot.dudi ade4
## 
## Attaching package: 'adegraphics'
## The following objects are masked from 'package:ade4':
## 
##     kplotsepan.coa, s.arrow, s.class, s.corcircle, s.distri, s.image,
##     s.label, s.logo, s.match, s.traject, s.value, table.value,
##     triangle.class
library(glmnet)
## Warning: package 'glmnet' was built under R version 3.6.3
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.6.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.3
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 3.6.3
library(dendextend)
## Warning: package 'dendextend' was built under R version 3.6.3
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## 
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggdendro':
## 
##     theme_dendro
## The following object is masked from 'package:stats':
## 
##     cutree
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.3
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:dendextend':
## 
##     rotate
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## v purrr   0.3.4
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(readr)

Import dataset

PheGenI_Association2 <- PheGenI_Association[!duplicated(PheGenI_Association$SNP.rs), ]

PheGenI_Association2 <- PheGenI_Association2 %>% filter(Population == 'European', na.rm = TRUE) 
PheGenI_Association2$SNP.rs <- paste0("rs", PheGenI_Association2$SNP.rs)

PheGenI_Association3 <- PheGenI_Association2$SNP.rs

view(PheGenI_Association3)
write.csv(PheGenI_Association3)
## "","x"
## "1","rs7901695"
## "2","rs1552224"
## "3","rs10440833"
## "4","rs7578326"
## "5","rs8050136"
## "6","rs5015480"
## "7","rs243021"
## "8","rs1387153"
## "9","rs864745"
## "10","rs231362"
## "11","rs4457053"
## "12","rs4506565"
## "13","rs2943641"
## "14","rs6931514"
## "15","rs5219"
## "16","rs10965250"
## "17","rs12779790"
## "18","rs972283"
## "19","rs8042680"
## "20","rs7578597"
## "21","rs896854"
## "22","rs7961581"
## "23","rs11634397"
## "24","rs1531343"
## "25","rs4607103"
## "26","rs4689388"
## "27","rs10946398"
## "28","rs10911021"
## "29","rs7957197"
## "30","rs1801214"
## "31","rs7018475"
## "32","rs13292136"
## "33","rs11642841"
## "34","rs10923931"
## "35","rs7593730"
## "36","rs9300039"
## "37","rs17036101"
## "38","rs13081389"
## "39","rs17428041"
## "40","rs7020996"
## "41","rs1153188"
## "42","rs71647933"
## "43","rs9465871"
## "44","rs35260355"
## "45","rs113932007"
## "46","rs12518099"
## "47","rs9896052"
## "48","rs6986153"
## "49","rs564398"
## "50","rs10980508"
## "51","rs11615866"
## "52","rs17447640"
## "53","rs11298745"
## "54","rs34620785"
## "55","rs886374"
## "56","rs12714314"
## "57","rs58383906"
## "58","rs358806"
## "59","rs35372009"
## "60","rs10512488"
## "61","rs7412314"
## "62","rs11099942"
## "63","rs13166103"
## "64","rs9502478"
## "65","rs10973627"
## "66","rs76703216"
## "67","rs4760790"
## "68","rs11060464"
## "69","rs12743974"
## "70","rs17177078"
## "71","rs13043901"
## "72","rs615545"
## "73","rs1525739"
## "74","rs148077446"
## "75","rs17025978"
## "76","rs12304921"
## "77","rs1495377"
## "78","rs12736701"
## "79","rs34428389"
## "80","rs2107167"
## "81","rs1754680"
## "82","rs340841"
## "83","rs7659604"
## "84","rs9384193"
## "85","rs470089"
## "86","rs12255372"
## "87","rs27779"
## "88","rs10954654"
## "89","rs640742"
## "90","rs3825253"
## "91","rs3825569"
## "92","rs9870410"
## "93","rs4687299"
## "94","rs4767658"
## "95","rs1918416"
## "96","rs10501281"
## "97","rs11849174"
## "98","rs557962"
## "99","rs2338545"
## "100","rs1287526"
## "101","rs12273344"
## "102","rs823968"
## "103","rs7839244"
## "104","rs1449720"
## "105","rs565979"
## "106","rs1932397"
## "107","rs739984"
## "108","rs9545903"
## "109","rs7207345"
## "110","rs1852027"
## "111","rs528957"
## "112","rs13139219"
## "113","rs2673776"
## "114","rs2501354"
## "115","rs13072106"
## "116","rs2470984"
## "117","rs616444"
## "118","rs9286938"
## "119","rs1030231"
## "120","rs11714343"
PheGenI_Association4 <- read.delim("~/ECU/Clinical Bioinformatics/Lecture_3_R_markdown/PheGenI_Association (2).tab")
View(PheGenI_Association)
PheGenI_Association5 <- PheGenI_Association4 %>% filter(Population == 'European', na.rm = TRUE) 

#PheGenI_Association6 <- subset(PheGenI_Association4, P.Value < 1e-5 & Population == 'European' & Gene!=Gene.2)
str(PheGenI_Association)
## 'data.frame':    542 obs. of  17 variables:
##  $ X.         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Trait      : Factor w/ 1 level "Diabetes Mellitus, Type 2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ SNP.rs     : int  7903146 7903146 7903146 2237896 34872471 7903146 7901695 7903146 2237892 7903146 ...
##  $ Context    : Factor w/ 8 levels "cds-synon","intergenic",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Gene       : Factor w/ 298 levels "ABHD17AP4","ABRA",..: 261 261 261 106 261 261 261 261 106 261 ...
##  $ Gene.ID    : int  6934 6934 6934 3784 6934 6934 6934 6934 3784 6934 ...
##  $ Gene.2     : Factor w/ 296 levels "ACHE","ACMSD",..: 260 260 260 113 260 260 260 260 113 260 ...
##  $ Gene.ID.2  : int  6934 6934 6934 3784 6934 6934 6934 6934 3784 6934 ...
##  $ Chromosome : Factor w/ 23 levels "1","10","11",..: 2 2 2 3 2 2 2 2 3 2 ...
##  $ Location   : int  112998590 112998590 112998590 2837210 112994312 112998590 112994329 112998590 2818521 112998590 ...
##  $ P.Value    : num  4e-94 8e-75 9e-75 3e-70 6e-53 ...
##  $ Source     : Factor w/ 2 levels "dbGaP","NHGRI": 2 2 2 2 2 2 2 2 2 2 ...
##  $ PubMed     : int  25102180 24509480 23300278 26818947 27189021 20581827 17463249 25102180 18711367 22693455 ...
##  $ Analysis.ID: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Study.ID   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Study.Name : Factor w/ 2 levels "","The Finland-United States Investigation of NIDDM Genetics (FUSION) - GWAS Study": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Population : Factor w/ 20 levels "African|African American",..: 1 17 16 8 6 9 9 1 8 12 ...
PheGenI_Association$Population <- ifelse(PheGenI_Association$Population=='European',0,1)
PheGenI_Association$Population <- factor(PheGenI_Association$Population,
                   levels = c(0,1),
                   labels = c('European', 'Other'))

PheGenI_Association2$SNP.rs <- NULL