This is the data analysis for the TurboID experiment I carried out using SWATH-MS for quantification of proteins interacting with the ASXL1 PHD domain. We must first load in the packages that are required in our analysis
library(gridExtra)
library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggrepel)
library(gghighlight)
options(ggrepel.max.overlaps = Inf)
setwd("~/Desktop/SWATH_R_Analysis")
Raw_SWATHMSData <- read_excel("A1P13 vs TID all t-test.xlsx")
SWATHMSData <- Raw_SWATHMSData
colnames(SWATHMSData)[18] <- "logFC"
##Volcano Plot of SWATHMS Data Generating a Volcano plot with proteins with a Log2FC greater than 0.5 and Adjusted p-value less than 0.05 (significant proteins) was generated as follows;
SWATHMSData$Adj_Pval <- p.adjust(SWATHMSData$`p-value`, "fdr")
colnames(SWATHMSData)[18] <- "logFC"
SWATHMSData$Log2FC <- log2(SWATHMSData$`Fold Change`)
UniProt <- read_excel("UniProtIDCompiled.xlsx")
SWATHMerge <- merge(SWATHMSData, UniProt, by = "Peak Name")
colnames(SWATHMerge)[23] <- "GeneName"
Prot_Sig <- SWATHMerge[(SWATHMerge$Log2FC > 0.5) & SWATHMerge$Adj_Pval <= 0.05, ]
Sig_Names <- Prot_Sig$`GeneName`
ggplot() + geom_point(data = SWATHMerge, aes(x = Log2FC, y = -log10(Adj_Pval)), size = 1.5, colour = "red") +
gghighlight(GeneName %in% Sig_Names, unhighlighted_params = aes(colour = "lightblue"), use_group_by = FALSE) +
geom_text_repel(aes(x = Log2FC, y = -log10(Adj_Pval), label = ifelse(`GeneName` %in% Sig_Names, `GeneName`, "")))
##Crapome comparison to Significant Protein List I want to compare my ‘hit list’ to their respective Crapome scores. The Crapome is a tool used to see whether proteins typically are produced from random MS experiments - if the Crapome and my enrichments align well, the identified proteins may simply be random.
GOSummary <- read_excel("GO_Summary.xlsx")
Prot_SigMerge <- merge(Prot_Sig, GOSummary, by = "Peak Name")
Crapome <- read_excel("Crapome score.xlsx")
View(Crapome)
CrapomeFC <- Prot_SigMerge %>% select("GeneName", "Log2FC")
colnames(Crapome)[1] <- "GeneName"
Prot_SigCrap<-merge(CrapomeFC, Crapome, by = "GeneName")
colnames(Prot_SigCrap)[3] <- "CrapomeScore"
as.numeric(as.character(Prot_SigCrap$Log2FC))
## [1] 0.8030306 1.7020889 2.1946314 0.5456976 0.8139482 1.3045248 4.1406811
## [8] 0.5793607 1.2950295 0.6797097 0.5851823 1.6184848 1.7640971 1.3185536
## [15] 1.0171161 0.6043471 0.5321922 0.5577577 0.7351181 0.9609650 0.5403184
## [22] 0.5465662 0.6188866 1.0422610 0.6142189 0.6491248 1.3067552 0.8875426
## [29] 0.7087546 0.5136300 0.6088305 1.3832653 1.3878896 1.3940634 0.8760706
## [36] 0.5014575 0.7272265 0.6379759 0.7188872 2.2748037 0.6301164 0.5238407
## [43] 0.7028585 1.1495263 0.5411471 0.6627587 1.4332280 0.6507403 0.5302902
## [50] 0.5434205 1.6139713 2.3427699 0.5545139 0.9504601 0.5047540 0.6357323
## [57] 1.5448411 1.0919703
as.numeric(as.character(Prot_SigCrap$CrapomeScore))
## [1] 0.206703911 0.227653631 0.255586592 0.276536313 0.025139665 0.002793296
## [7] 0.001396648 0.009776536 0.009776536 0.444134078 0.324022346 0.006983240
## [13] 0.048882682 0.078212291 0.284916201 0.113128492 0.709497207 0.301675978
## [19] 0.125698324 0.110335196 0.032122905 0.241620112 0.085195531 0.086592179
## [25] 0.013966480 0.529329609 0.044692737 0.002793296 0.206703911 0.020949721
## [31] 0.120111732 0.008379888 0.766759777 0.378491620 0.163407821 0.624301676
## [37] 0.600558659 0.544692737 0.493016760 0.667597765 0.205307263 0.353351955
## [43] 0.234636872 0.061452514 0.335195531 0.061452514 0.354748603 0.224860335
## [49] 0.650837989 0.303072626 0.001396648 0.638268156 0.094972067 0.159217877
## [55] 0.118715084 0.353351955 0.030726257 0.030726257
Prot_SigCrapNames <- Prot_SigCrap$'GeneName'
ggplot(Prot_SigCrap, aes(x = Log2FC, y = CrapomeScore)) + geom_point(color = "red", size = 1.5) + geom_text_repel(aes(x = Log2FC, y = CrapomeScore, label = ifelse(`GeneName` %in% Prot_SigCrapNames, `GeneName`, "")))
There appears to be no correlation between the Crapome score and the protein log2 fold change, which is promising.
##Comparison to MCF7 and HCC1417 TRIB1 RIME analysis Peter suggested that I compare my protein hits to Hamish’s for his different cell lines. Hamish sent me a list of significantly enriched proteins from either of his cell lines, filtered for nuclear proteins and for proteins with a log2FC greater than 2 and p-value of less than 0.05. I generated a graph of the MCF7 cells compared to all of my proteins, as follows;
MCF7Overlap <- read_excel("MCF7_Nuclear_For_Cam.xlsx")
colnames(MCF7Overlap)[1] <- "UniProtID"
MCFOverlapAll<-merge(SWATHMerge, MCF7Overlap, by = "UniProtID")
MCFOverlapSig<-merge(Prot_SigMerge, MCF7Overlap, by = "UniProtID")
MCFOverlapAllName <- MCFOverlapAll$GeneName
colnames(MCFOverlapSig)[24] <- "GeneName"
MCFOverlapSigName <- MCFOverlapSig$GeneName
as.numeric(as.character(MCFOverlapAll$Log2FC))
## [1] 0.174637292 -0.474591568 0.404725166 0.005331378 0.113036650
## [6] -0.714734510 -0.004393944 -0.305974715 -0.526246431 0.324694237
## [11] -0.051744511 0.412514530 -0.183238657 0.045945450 1.333517521
## [16] 0.008818077 0.763128402 0.315871761 -0.714937129 0.180183371
## [21] 0.008407275 -0.090797058 0.837824029 -0.149975848 -0.631515210
## [26] -0.023647229 0.141837272 -0.526764028 -0.249495387 0.053014355
## [31] 0.061617563 -0.114590944 0.005436742 0.179659431 0.716025597
## [36] -0.576742209 0.262886666 0.193971728 0.154348648 0.020764269
## [41] -0.675956248 -0.035820458 -0.034596818 -0.628057361 0.076221144
## [46] 0.062617946 0.071212987 0.071212987 -0.351780382 -0.112473825
## [51] -0.269728741 -0.184449332 0.252840582 -0.307386663 0.068881592
## [56] -0.191967064 -0.210087566 0.134904630 -0.754045782 1.702088880
## [61] 0.114673496 0.609591141 -0.145206256 -0.093022408 0.176833421
## [66] 0.039888177 0.608830480 0.155583011 0.050389101 0.097320517
## [71] 0.198729065 -0.368435878 -0.275789602 -0.073293937 0.047982755
## [76] 0.046170824 0.067275997 0.242327829 -0.295653884 -0.212499775
## [81] 0.876644370 1.386261102 -0.064420278 -0.415315871 0.577163190
## [86] -0.057280885 0.360667462 -0.388721365 -0.716034979 0.408204429
## [91] 0.014547636 0.081804535 1.258748571 0.330732861 -0.339031322
## [96] 0.025965874 -0.007083889 0.203896263 0.366953605 -0.040613614
## [101] 0.016196779 0.178695796 -0.620552085 0.073974070 0.128041536
## [106] 0.080293964 -0.930433230 0.876070645 0.041695526 0.410765735
## [111] -0.131237133 0.194577167 0.372209042 0.067706838 0.137196596
## [116] 0.371926687 -0.255943161 1.613971341 -0.106145696 0.044747812
## [121] -0.218675723 0.120742375 0.038518927 0.098741378 -1.333580813
## [126] -0.093362391 0.056903494 -0.132398546 0.110281260 0.037493101
## [131] -0.370541358 0.154377684 -0.031262911 0.317532794 -0.355095885
## [136] 0.486700503 -0.283656792 0.079333704 0.125773536 0.045560882
## [141] 0.318378747 -0.534371107 -1.656492212 -0.084862502 -0.038304706
## [146] 0.021333530 -0.057388864 0.384627315 0.389036548 0.121958835
## [151] -0.238254664 -0.097892927 -0.501222502 -0.973900740 -0.483595611
## [156] -0.306472170 -0.237507914 0.613201025 0.147481684
as.numeric(as.character(MCFOverlapAll$log2FCMCF7))
## [1] 2.23 2.01 2.46 2.04 2.06 2.33 2.03 2.05 2.58 2.43 2.23 2.45 2.45 2.18 2.70
## [16] 2.01 3.54 2.95 2.55 2.20 2.29 2.89 2.01 2.17 2.07 2.75 2.05 2.04 2.39 2.73
## [31] 2.73 3.15 2.44 2.87 2.03 2.73 2.17 2.10 2.13 2.12 2.50 2.24 2.51 2.43 2.24
## [46] 2.30 2.54 2.54 2.23 4.85 2.30 2.89 2.12 2.27 2.08 2.04 2.48 3.13 2.16 2.67
## [61] 2.09 2.36 2.91 2.63 2.99 3.01 2.77 2.55 2.66 2.80 2.12 2.01 2.10 2.10 2.07
## [76] 2.07 2.11 2.94 2.07 2.32 2.16 2.06 2.77 2.02 2.42 2.04 2.02 2.01 2.28 2.01
## [91] 2.49 2.07 2.37 2.51 3.09 2.19 2.19 2.30 2.10 2.77 2.73 2.90 2.88 2.02 2.34
## [106] 2.32 2.27 2.46 2.07 2.15 3.08 3.08 2.42 2.06 2.51 2.64 2.15 2.46 2.18 2.30
## [121] 2.85 2.26 2.87 2.13 2.67 2.58 2.53 2.22 2.65 2.22 4.79 2.33 2.71 2.10 2.09
## [136] 2.05 2.01 2.09 2.31 4.81 2.04 2.04 2.30 2.17 2.01 2.82 2.56 2.53 2.53 2.11
## [151] 2.07 4.43 2.82 2.01 2.87 3.17 2.07 2.03 2.82
MCFAllPlot <- ggplot(MCFOverlapAll, aes(x = Log2FC, y = log2FCMCF7))
MCFAllPlot + geom_point(color = "red", size = 1.5) + geom_text_repel(aes(x = Log2FC, y = log2FCMCF7, label = ifelse(`GeneName` %in% MCFOverlapAllName, `GeneName`, "")))
I also made up a seperate graph comparing my significantly enriched proteins to Hamish’s list.
ggplot() + geom_point(data = MCFOverlapAll, aes(x = Log2FC, y = log2FCMCF7), size = 1.5, colour = "red") +
gghighlight(GeneName %in% MCFOverlapSigName, unhighlighted_params = aes(colour = "lightblue"), use_group_by = FALSE) +
geom_text_repel(aes(x = Log2FC, y = log2FCMCF7, label = ifelse(`GeneName` %in% MCFOverlapSigName, `GeneName`, "")))
Finally, this figure was adapted to show only the proteins with a Log2FC in the ASXL TurboID analysis of more than 0, i.e. enriched proteins.
MCF7Sig <- subset(MCFOverlapAll, Log2FC > 0)
ggplot() + geom_point(data = MCF7Sig, aes(x = Log2FC, y = log2FCMCF7), size = 1.5, colour = "red") +
gghighlight(GeneName %in% MCFOverlapSigName, unhighlighted_params = aes(colour = "lightblue"), use_group_by = FALSE) +
geom_text_repel(aes(x = Log2FC, y = log2FCMCF7, label = ifelse(`GeneName` %in% MCFOverlapSigName, `GeneName`, "")))
HCC1419Overlap <- read_excel("HCC1419.xlsx")
colnames(HCC1419Overlap)[1] <- "UniProtID"
HCCOverlapAll<-merge(SWATHMerge, HCC1419Overlap, by = "UniProtID")
HCCOverlapSig<-merge(Prot_SigMerge, HCC1419Overlap, by = "UniProtID")
HCCOverlapAllName <- HCCOverlapAll$GeneName
colnames(HCCOverlapSig)[24] <- "Role"
HCCOverlapSigName <- HCCOverlapSig$GeneName
as.numeric(as.character(HCCOverlapAll$Log2FC))
## [1] 0.404725166 -0.221964092 0.016734882 0.051078633 0.037787073
## [6] 0.113036650 -0.226152845 -0.209290827 -0.305974715 -0.526246431
## [11] -0.217779189 0.658775688 -0.183238657 -0.208291947 1.333517521
## [16] -0.071475200 0.395124621 0.133341646 0.336747128 -0.211811552
## [21] 0.317480309 0.315871761 -0.059744761 0.019933077 0.054933823
## [26] -0.742956945 0.251008436 -0.142688557 -0.294500935 0.217019592
## [31] 0.009539927 -0.035722444 -0.067099901 0.008407275 -0.002068983
## [36] -0.036226908 -0.600543776 0.019467392 0.396642639 0.136757905
## [41] 0.191577558 -0.019531139 -0.132451044 0.340475603 -0.022411049
## [46] 0.177343152 -0.631515210 -0.023647229 -0.071986259 -0.526764028
## [51] -0.249495387 -0.298675952 0.061617563 0.053014355 0.085164012
## [56] 0.546566230 -0.114590944 -0.024632024 -0.135202241 -0.167255303
## [61] -0.086358340 0.011077903 -0.386660988 0.154348648 0.015421603
## [66] -0.089351885 -0.679168161 0.128147042 -0.212221158 -0.675956248
## [71] -0.034596818 -0.628057361 -0.177332894 -0.094313953 -0.434105942
## [76] 2.274803666 0.076221144 -0.007274094 -0.391531178 0.242590146
## [81] 0.085589855 -0.351780382 -0.112473825 -0.269728741 -0.041137916
## [86] -0.041137916 -0.184449332 0.252840582 -0.120677967 -0.191967064
## [91] -0.210087566 -0.754045782 -0.205366966 -0.535762460 0.609591141
## [96] 0.148176521 -0.145206256 0.039888177 0.530290162 0.050389101
## [101] 0.097320517 0.198729065 0.034230289 -0.368435878 -0.275789602
## [106] -0.373406791 0.121702198 0.046170824 0.047982755 0.067275997
## [111] -0.324626763 0.242327829 -0.295653884 -0.210183998 -0.251444057
## [116] 0.033572540 -0.064420278 0.152776929 0.094942247 -0.415315871
## [121] 0.408204429 0.093084239 1.258748571 0.330732861 0.541147077
## [126] 0.464766513 0.088180100 -0.620552085 -0.417701596 0.128041536
## [131] -0.086346469 0.127859758 0.460751264 0.702858544 0.410765735
## [136] -0.434266624 0.371926687 1.613971341 0.190906045 0.044512021
## [141] 0.098741378 -0.191848998 -0.093362391 0.056903494 -0.132398546
## [146] -0.219458487 0.110281260 0.114898444 0.079333704 0.125773536
## [151] 0.045560882 0.318378747 1.916431384 0.430020700 -0.196841780
## [156] 0.389036548 0.384627315 -0.104512454 0.329407547 -0.097892927
## [161] -0.501222502 0.147481684
as.numeric(as.character(HCCOverlapAll$log2FC))
## [1] 2.80 2.51 2.54 2.59 2.17 2.42 2.72 2.72 2.09 2.73 3.06 2.22 2.59 2.83 2.37
## [16] 2.21 2.01 2.11 2.15 2.61 2.22 3.10 2.49 3.31 2.09 2.10 2.21 2.19 2.59 2.13
## [31] 3.16 2.28 2.35 2.43 2.48 2.41 2.56 2.98 2.19 2.30 2.06 2.41 2.17 2.75 2.42
## [46] 2.70 3.22 3.10 2.16 2.64 3.01 2.46 2.90 2.90 2.02 2.31 3.32 2.94 2.76 2.76
## [61] 2.03 2.30 2.04 2.85 3.13 2.56 2.40 2.11 2.03 3.26 2.20 2.88 2.36 2.12 2.49
## [76] 2.09 3.26 2.30 2.71 2.86 2.16 2.31 5.00 2.03 2.51 2.51 3.40 2.40 2.53 2.36
## [91] 2.94 2.45 2.51 2.39 3.34 2.79 2.08 3.14 3.06 2.77 3.08 2.07 3.29 2.80 2.70
## [106] 2.02 2.64 2.47 2.47 2.04 2.37 2.20 2.62 2.07 2.63 2.02 2.35 2.25 2.54 2.83
## [121] 2.04 2.31 3.64 2.62 2.67 2.59 3.09 2.31 2.38 2.84 2.51 2.77 2.07 2.98 2.94
## [136] 2.15 2.46 5.00 2.52 2.62 2.48 2.33 4.84 3.38 2.00 2.08 2.85 2.43 2.28 2.49
## [151] 4.55 3.21 2.69 2.05 2.20 4.57 4.57 2.66 2.02 4.05 3.16 2.22
HCCSig <- subset(HCCOverlapAll, Log2FC > 0)
ggplot() + geom_point(data = HCCSig, aes(x = Log2FC, y = log2FC), size = 1.5, colour = "red") +
gghighlight(GeneName %in% HCCOverlapSigName, unhighlighted_params = aes(colour = "lightblue"), use_group_by = FALSE) +
geom_text_repel(aes(x = Log2FC, y = log2FC, label = ifelse(`GeneName` %in% HCCOverlapSigName, `GeneName`, "")))