RP lists
- List of hand-curated RP/MRP genes, specific to the large & small ribosomal subunits (RP-MRP.csv)
- List of ribosomal biogenesis genes obtained from GO searches; search term ‘ribosomal biogenesis’ (rib_biogenesis.csv)
- List of nucleolar biology genes; GO searches on ‘protein localization to nucleolus’ (GO:1902570), ‘nucleolus organization’ (GO:0007000), and ‘regulation of protein localization to nucleolus’ (GO:1904749) (nucleolus.csv)
- List of ribosomal biogenesis & nucleolar biology genes from the GO searches (bullets 2 & 3) (nuc-rib-bio.csv)
- List of the combined set of RP/MRP, ribosomal biogenesis, and nucleolar biology genes (bullets 1-3) (combo4-RP-NUC-ribBio.csv)
- List of RP genes from a GO search on ‘ribosomal protein’ (bullet 1 was hand-curated from this list) (rp_GO.csv)
- List of RP, ribosomal biogenesis, and nucleolar biology genes from G0 searches (bullets 2, 3, & 6–includes all of bullet 1) (biggest_list.csv)
setwd('/n/home04/cdadams/RP_lists')
# Hand-curated RP list (no pseudogenes): 150
rp_list_expanded=read.csv('/n/home04/cdadams/ld/ldsc/expanded_verified.csv')
dim(rp_list_expanded)
## [1] 150 1
rp_list_expanded
## all_RP
## 1 MRPL1
## 2 MRPL10
## 3 MRPL11
## 4 MRPL12
## 5 MRPL13
## 6 MRPL14
## 7 MRPL15
## 8 MRPL16
## 9 MRPL17
## 10 MRPL18
## 11 MRPL19
## 12 MRPL2
## 13 MRPL20
## 14 MRPL21
## 15 MRPL22
## 16 MRPL23
## 17 MRPL24
## 18 MRPL27
## 19 MRPL28
## 20 MRPL3
## 21 MRPL30
## 22 MRPL32
## 23 MRPL33
## 24 MRPL34
## 25 MRPL35
## 26 MRPL36
## 27 MRPL37
## 28 MRPL38
## 29 MRPL39
## 30 MRPL4
## 31 MRPL40
## 32 MRPL41
## 33 MRPL42
## 34 MRPL43
## 35 MRPL44
## 36 MRPL45
## 37 MRPL46
## 38 MRPL47
## 39 MRPL48
## 40 MRPL49
## 41 MRPL50
## 42 MRPL51
## 43 MRPL52
## 44 MRPL53
## 45 MRPL54
## 46 MRPL55
## 47 MRPL57
## 48 MRPL9
## 49 MRPS10
## 50 MRPS11
## 51 MRPS12
## 52 MRPS14
## 53 MRPS15
## 54 MRPS16
## 55 MRPS17
## 56 MRPS18A
## 57 MRPS18B
## 58 MRPS18C
## 59 MRPS2
## 60 MRPS21
## 61 MRPS22
## 62 MRPS23
## 63 MRPS24
## 64 MRPS25
## 65 MRPS26
## 66 MRPS27
## 67 MRPS28
## 68 MRPS30
## 69 MRPS31
## 70 MRPS33
## 71 MRPS34
## 72 MRPS35
## 73 MRPS36
## 74 MRPS5
## 75 MRPS6
## 76 MRPS7
## 77 MRPS9
## 78 RPL10
## 79 RPL10A
## 80 RPL11
## 81 RPL12
## 82 RPL13
## 83 RPL13A
## 84 RPL14
## 85 RPL15
## 86 RPL17
## 87 RPL18
## 88 RPL18A
## 89 RPL19
## 90 RPL21
## 91 RPL22
## 92 RPL23
## 93 RPL23A
## 94 RPL24
## 95 RPL26
## 96 RPL27
## 97 RPL27A
## 98 RPL28
## 99 RPL29
## 100 RPL3
## 101 RPL30
## 102 RPL31
## 103 RPL32
## 104 RPL34
## 105 RPL35
## 106 RPL35A
## 107 RPL36
## 108 RPL36A
## 109 RPL37
## 110 RPL37A
## 111 RPL38
## 112 RPL39
## 113 RPL3L
## 114 RPL4
## 115 RPL41
## 116 RPL5
## 117 RPL6
## 118 RPL7
## 119 RPL7A
## 120 RPL8
## 121 RPL9
## 122 RPS10
## 123 RPS11
## 124 RPS12
## 125 RPS13
## 126 RPS14
## 127 RPS15
## 128 RPS15A
## 129 RPS16
## 130 RPS17
## 131 RPS18
## 132 RPS19
## 133 RPS2
## 134 RPS20
## 135 RPS21
## 136 RPS23
## 137 RPS24
## 138 RPS25
## 139 RPS26
## 140 RPS27
## 141 RPS27A
## 142 RPS28
## 143 RPS29
## 144 RPS3
## 145 RPS3A
## 146 RPS5
## 147 RPS6
## 148 RPS7
## 149 RPS8
## 150 RPS9
#write.csv(rp_list_expanded, '/n/home04/cdadams/RP_lists/RP-MRP.csv')
# GO list with search term "ribosomal biogenesis"
rib_biogenesis=read.csv('/n/home04/cdadams/ld/ldsc/rib_biogenesis.csv')
rib_biogenesis$gene=as.character(rib_biogenesis$gene)
rib_biogenesis$gene=substr(rib_biogenesis$gene,1,nchar(rib_biogenesis$gene)-1)
#write.csv(rib_biogenesis, '/n/home04/cdadams/RP_lists/rib_biogenesis.csv')
# GO list from search terms 'protein localization to nucleolus' (GO:1902570),
# 'nucleolus organization' (GO:0007000), and 'regulation of protein localization to nucleolus' (GO:1904749)
nucleolus=read.csv('/n/home04/cdadams/ld/ldsc/nucleolus.csv')
nucleolus$gene.=as.character(nucleolus$gene.)
nucleolus$gene=substr(nucleolus$gene,1,nchar(nucleolus$gene)-1)
nucleolus$gene.=NULL
#write.csv(nucleolus, '/n/home04/cdadams/RP_lists/nucleolus.csv')
# Combinatino of ribosomal biogenesis and nucleolar biology from GO searches
combo=rbind(rib_biogenesis,nucleolus)
combo2=combo %>% distinct(gene, .keep_all = TRUE)
#write.csv(combo2, '/n/home04/cdadams/RP_lists/nuc-rib-bio.csv')
# Set of RPs found in the ribosomal biogenesis and nucleolar biology from GO searches: 116
rp_in_nuc=combo2[which(combo2$gene %in% rp_list_expanded$all_RP),]
rp_in_nuc=as.data.frame(rp_in_nuc)
rp_in_nuc$gene=rp_in_nuc$rp_in_nuc
rp_in_nuc$rp_in_nuc=NULL
# Create the gene variable for merging wih combo2
rp_list_expanded$gene=rp_list_expanded$all_RP
rp_list_expanded$all_RP=NULL
# Set of all RP/ribosomal biogenesis/nucleolar biology genes: 498
combo3=rbind(combo2,rp_list_expanded)
combo4=combo3 %>% distinct(gene, .keep_all = TRUE)
#write.csv(combo4, '/n/home04/cdadams/RP_lists/combo4-RP-NUC-ribBio.csv')
# Set of RPs not in rp_in_nuc: interestingly, we'd have missed 34 cytosolic RPs
# This means the search terms "ribosomal biogenesis' and the nucleolar GO terms didn't capture these 34
`%notin%` <- Negate(`%in%`)
rps_not_in_GO_list=rp_list_expanded[which(rp_list_expanded$gene %notin% rp_in_nuc$gene),]
# Set of RPs from GO search on "ribosomal protein" in humans
rp_GO=read.table(file = "GO_search_ribosomal_protein", sep = "\t", header=TRUE)
head(rp_GO)
## gene
## 1 RPL13A
## 2 RPS5
## 3 RPL15
## 4 RPL39
## 5 RPL30
## 6 RPL8
dim(rp_GO)
## [1] 355 1
#write.csv(rp_GO, "/n/home04/cdadams/RP_lists/rp_GO.csv")
# Which rp_list_expanded (hand-curated) not in rp_GO: all there
rps_not_in_GO_list2=rp_list_expanded[which(rp_list_expanded$gene %notin% rp_GO$gene),]
length(rps_not_in_GO_list2)
## [1] 0
# Which rp_GO unique: to check that we didn't miss RPs in rp_list_expanded (hand-curated)
unique_to_rp_GO=rp_GO[which(rp_GO$gene %notin% rp_list_expanded$gene),]
unique_to_rp_GO=as.data.frame(unique_to_rp_GO)
unique_to_rp_GO$gene=unique_to_rp_GO$unique_to_rp_GO
unique_to_rp_GO$unique_to_rp_GO=NULL
unique_to_rp_GO=unique_to_rp_GO[order(unique_to_rp_GO$gene),]
unique_to_rp_GO
## [1] "A0A0U1RQV5" "ABCE1" "ABT1" "BMS1" "BOP1"
## [6] "BRIX1" "BYSL" "C12orf65" "CIRBP" "CPEB2"
## [11] "CPSF6" "CYLD" "DAP3" "DCAF13" "DDX18"
## [16] "DDX28" "DDX3X" "DHX29" "DHX30" "DHX33"
## [21] "DHX37" "DIEXF" "EBNA1BP2" "EIF1" "EIF1B"
## [26] "EIF2A" "EIF2D" "EIF4B" "EIF4H" "EIF6"
## [31] "EMG1" "ERAL1" "ERCC2" "EXOSC7" "EXOSC8"
## [36] "EXOSC9" "FAM207A" "FASTKD2" "FTSJ1" "FTSJ3"
## [41] "FXR2" "GRK1" "GRK2" "GRK3" "GRK4"
## [46] "GRK5" "GRK6" "GRK7" "GTF2H5" "GTF3A"
## [51] "GTPBP4" "HBA1" "HEATR1" "HEATR3" "HELB"
## [56] "IMP3" "IPO11" "IPO5" "ISG15" "KPNB1"
## [61] "KRI1" "LARP1" "LARP4" "LAS1L" "LOC102724159"
## [66] "LSG1" "LSM6" "LTN1" "LTO1" "LTV1"
## [71] "MAK16" "MALSU1" "MCTS1" "MDN1" "METTL17"
## [76] "MPV17L2" "MRPL58" "MRRF" "MRTO4" "MTERF4"
## [81] "MTIF2" "MTIF3" "NAT10" "NDUFAB1" "NEMF"
## [86] "NGDN" "NHP2" "NIFK" "NIP7" "NLE1"
## [91] "NMD3" "NME1" "NOB1" "NOC2L" "NOL10"
## [96] "NOL11" "NOL9" "NOM1" "NOP14" "NOP16"
## [101] "NOP2" "NOP53" "NOP9" "NPM1" "NSA2"
## [106] "NSUN3" "NSUN4" "NUP88" "NVL" "OLA1"
## [111] "PAK1IP1" "PES1" "PHF2" "PHF8" "PIM1"
## [116] "PPAN" "PRKDC" "PTCD3" "PWP2" "RACK1"
## [121] "RAN" "RBM3" "RCL1" "RIMKLA" "RIMKLB"
## [126] "RIOK1" "RIOK2" "RIOK3" "RIOX1" "RIOX2"
## [131] "RPF1" "RPF2" "RPL10L" "RPL13AP3" "RPL22L1"
## [136] "RPL26L1" "RPL36AL" "RPL37AP8" "RPL39L" "RPL39P5"
## [141] "RPL7L1" "RPLP0" "RPLP0P6" "RPLP1" "RPLP2"
## [146] "RPP40" "RPS10P5" "RPS26P11" "RPS27AP5" "RPS27L"
## [151] "RPS4X" "RPS4Y1" "RPS4Y2" "RPS6KA1" "RPS6KA2"
## [156] "RPS6KA3" "RPS6KA4" "RPS6KA5" "RPS6KA6" "RPS6KB1"
## [161] "RPS6KB2" "RPS6KC1" "RPS6KL1" "RPSA" "RRP1"
## [166] "RRP15" "RRP1B" "RRP36" "RRP7A" "RRP7BP"
## [171] "RRP8" "RRS1" "RSKR" "RSL1D1" "RSL24D1"
## [176] "RSPH9" "SDAD1" "SHFL" "SNU13" "SNX15"
## [181] "SRBD1" "SRFBP1" "SURF6" "TBL3" "TRAF7"
## [186] "TSR1" "TSR2" "TSR3" "UBA52" "UNG"
## [191] "URB1" "UTP20" "UTP23" "UTP3" "UTP4"
## [196] "UTP6" "WDR12" "WDR3" "WDR46" "WDR74"
## [201] "XPO1" "XRCC5" "ZNF622" "ZNHIT3" "ZNHIT6"
# Get the rp_GO and combo4 list
biggest_list=rbind(combo4, rp_GO)
biggest_list=biggest_list %>% distinct(gene, .keep_all = TRUE)
dim(biggest_list)
## [1] 560 1
biggest_list=biggest_list[order(biggest_list$gene),]
biggest_list
## [1] "A0A0U1RQV5" "AATF" "ABCE1"
## [4] "ABT1" "APEX1" "APOD"
## [7] "AURKAIP1" "BAG6" "BMS1"
## [10] "BMT2" "BOP1" "BRIX1"
## [13] "BUD23" "BYSL" "C12orf65"
## [16] "C1D" "C1QBP" "CHCHD1"
## [19] "CHD7" "CIRBP" "CKS1B"
## [22] "CKS2" "CNOT6" "CPEB2"
## [25] "CPSF6" "CSDE1" "CUL4A"
## [28] "CUL4B" "CYLD" "DAP3"
## [31] "DCAF13" "DDX10" "DDX17"
## [34] "DDX18" "DDX21" "DDX27"
## [37] "DDX28" "DDX31" "DDX3X"
## [40] "DDX47" "DDX49" "DDX51"
## [43] "DDX52" "DDX54" "DDX56"
## [46] "DENR" "DHX29" "DHX30"
## [49] "DHX33" "DHX37" "DHX9"
## [52] "DIEXF" "DIMT1" "DIS3"
## [55] "DKC1" "DRD2" "DRD3"
## [58] "DRD4" "DROSHA" "EBNA1BP2"
## [61] "EFL1" "EIF1" "EIF1B"
## [64] "EIF2A" "EIF2AK2" "EIF2AK4"
## [67] "EIF2D" "EIF2S1" "EIF3C"
## [70] "EIF3H" "EIF3K" "EIF4A3"
## [73] "EIF4B" "EIF4H" "EIF5A"
## [76] "EIF5A2" "EIF5AL1" "EIF6"
## [79] "EMG1" "ERAL1" "ERCC2"
## [82] "ERI1" "ERI2" "ERI3"
## [85] "ESF1" "ETF1" "EXOSC1"
## [88] "EXOSC10" "EXOSC2" "EXOSC3"
## [91] "EXOSC4" "EXOSC5" "EXOSC6"
## [94] "EXOSC7" "EXOSC8" "EXOSC9"
## [97] "FAM207A" "FASTKD2" "FBL"
## [100] "FBLL1" "FCF1" "FDXACB1"
## [103] "FMR1" "FRG1" "FTSJ1"
## [106] "FTSJ3" "FXR2" "GADD45GIP1"
## [109] "GAR1" "GEMIN4" "GEMIN5"
## [112] "GFM2" "gins_human" "GLUL"
## [115] "GNL2" "GNL3L" "GRK1"
## [118] "GRK2" "GRK3" "GRK4"
## [121] "GRK5" "GRK6" "GRK7"
## [124] "GTF2H5" "GTF3A" "GTPBP10"
## [127] "GTPBP4" "HBA1" "HEATR1"
## [130] "HEATR3" "HELB" "HELQ"
## [133] "HSPA5" "IGHMBP2" "IMP3"
## [136] "IMP4" "IPO11" "IPO5"
## [139] "ISG15" "ISG20" "ISG20L2"
## [142] "KAT2B" "kcne2-kcnh2_human" "KPNB1"
## [145] "KRI1" "KRR1" "LARP1"
## [148] "LARP4" "LAS1L" "LETM1"
## [151] "LOC102724159" "LSG1" "LSM6"
## [154] "LTN1" "LTO1" "LTV1"
## [157] "LYAR" "MAIP1" "MAK16"
## [160] "MALSU1" "MCRS1" "MCTS1"
## [163] "MDN1" "METTL15" "METTL15P1"
## [166] "METTL16" "METTL17" "METTL5"
## [169] "MPHOSPH10" "MPHOSPH6" "MPV17L2"
## [172] "MRM1" "MRM2" "MRM3"
## [175] "MRPL1" "MRPL10" "MRPL11"
## [178] "MRPL12" "MRPL13" "MRPL14"
## [181] "MRPL15" "MRPL16" "MRPL17"
## [184] "MRPL18" "MRPL19" "MRPL2"
## [187] "MRPL20" "MRPL21" "MRPL22"
## [190] "MRPL23" "MRPL24" "MRPL27"
## [193] "MRPL28" "MRPL3" "MRPL30"
## [196] "MRPL32" "MRPL33" "MRPL34"
## [199] "MRPL35" "MRPL36" "MRPL37"
## [202] "MRPL38" "MRPL39" "MRPL4"
## [205] "MRPL40" "MRPL41" "MRPL42"
## [208] "MRPL43" "MRPL44" "MRPL45"
## [211] "MRPL46" "MRPL47" "MRPL48"
## [214] "MRPL49" "MRPL50" "MRPL51"
## [217] "MRPL52" "MRPL53" "MRPL54"
## [220] "MRPL55" "MRPL57" "MRPL58"
## [223] "MRPL9" "MRPS10" "MRPS11"
## [226] "MRPS12" "MRPS14" "MRPS15"
## [229] "MRPS16" "MRPS17" "MRPS18A"
## [232] "MRPS18B" "MRPS18C" "MRPS2"
## [235] "MRPS21" "MRPS22" "MRPS23"
## [238] "MRPS24" "MRPS25" "MRPS26"
## [241] "MRPS27" "MRPS28" "MRPS30"
## [244] "MRPS31" "MRPS33" "MRPS34"
## [247] "MRPS35" "MRPS36" "MRPS5"
## [250] "MRPS6" "MRPS7" "MRPS9"
## [253] "MRRF" "MRTO4" "MTERF3"
## [256] "MTERF4" "MTG2" "MTIF2"
## [259] "MTIF3" "MTOR" "MTREX"
## [262] "MYBBP1A" "NAA10" "NAF1"
## [265] "NAT10" "NCK1" "NDUFA7"
## [268] "NDUFAB1" "NEMF" "NGDN"
## [271] "NHP2" "NIFK" "NIP7"
## [274] "NLE1" "NMD3" "NME1"
## [277] "NOB1" "NOC2L" "NOC4L"
## [280] "NOL10" "NOL11" "NOL6"
## [283] "NOL8" "NOL9" "NOLC1"
## [286] "NOM1" "NOP10" "NOP14"
## [289] "NOP16" "NOP2" "NOP53"
## [292] "NOP56" "NOP58" "NOP9"
## [295] "NPM1" "NPM3" "NSA2"
## [298] "NSUN3" "NSUN4" "NSUN5"
## [301] "NSUN5P1" "NSUN5P2" "NUDT16"
## [304] "NUFIP1" "NUP88" "NVL"
## [307] "OLA1" "OXA1L" "PA2G4"
## [310] "PAK1IP1" "PDCD11" "PELO"
## [313] "PELP1" "PES1" "PHF2"
## [316] "PHF8" "PIH1D1" "PIH1D2"
## [319] "PIM1" "PIN4" "PINX1"
## [322] "PNPT1" "POLR1A" "POLR1B"
## [325] "POP4" "POP5" "POP7"
## [328] "PPAN" "PPARGC1A" "PRKDC"
## [331] "PTCD3" "PTEN" "PWP1"
## [334] "PWP2" "PYM1" "RACK1"
## [337] "RAN" "RBFA" "RBIS"
## [340] "RBM3" "RCL1" "REXO4"
## [343] "RICTOR" "RIMKLA" "RIMKLB"
## [346] "RIOK1" "RIOK2" "RIOK3"
## [349] "RIOX1" "RIOX2" "RNASEL"
## [352] "RPF1" "RPF2" "RPL10"
## [355] "RPL10A" "RPL10L" "RPL11"
## [358] "RPL12" "RPL13" "RPL13A"
## [361] "RPL13AP3" "RPL14" "RPL15"
## [364] "RPL17" "RPL18" "RPL18A"
## [367] "RPL19" "RPL21" "RPL22"
## [370] "RPL22L1" "RPL23" "RPL23A"
## [373] "RPL24" "RPL26" "RPL26L1"
## [376] "RPL27" "RPL27A" "RPL28"
## [379] "RPL29" "RPL3" "RPL30"
## [382] "RPL31" "RPL32" "RPL34"
## [385] "RPL35" "RPL35A" "RPL36"
## [388] "RPL36A" "RPL36AL" "RPL37"
## [391] "RPL37A" "RPL37AP8" "RPL38"
## [394] "RPL39" "RPL39L" "RPL39P5"
## [397] "RPL3L" "RPL4" "RPL41"
## [400] "RPL5" "RPL6" "RPL7"
## [403] "RPL7A" "RPL7L1" "RPL8"
## [406] "RPL9" "RPLP0" "RPLP0P6"
## [409] "RPLP1" "RPLP2" "RPP25"
## [412] "RPP30" "RPP38" "RPP40"
## [415] "RPS10" "RPS10P5" "RPS11"
## [418] "RPS12" "RPS13" "RPS14"
## [421] "RPS15" "RPS15A" "RPS16"
## [424] "RPS17" "RPS18" "RPS19"
## [427] "RPS2" "RPS20" "RPS21"
## [430] "RPS23" "RPS24" "RPS25"
## [433] "RPS26" "RPS26P11" "RPS27"
## [436] "RPS27A" "RPS27AP5" "RPS27L"
## [439] "RPS28" "RPS29" "RPS3"
## [442] "RPS3A" "RPS4X" "RPS4Y1"
## [445] "RPS4Y2" "RPS5" "RPS6"
## [448] "RPS6KA1" "RPS6KA2" "RPS6KA3"
## [451] "RPS6KA4" "RPS6KA5" "RPS6KA6"
## [454] "RPS6KB1" "RPS6KB2" "RPS6KC1"
## [457] "RPS6KL1" "RPS7" "RPS8"
## [460] "RPS9" "RPSA" "RPUSD1"
## [463] "RPUSD2" "RPUSD3" "RPUSD4"
## [466] "RRNAD1" "RRP1" "RRP12"
## [469] "RRP15" "RRP1B" "RRP36"
## [472] "RRP7A" "RRP7BP" "RRP8"
## [475] "RRP9" "RRS1" "RSKR"
## [478] "RSL1D1" "RSL24D1" "RSPH9"
## [481] "SART1" "SBDS" "SDAD1"
## [484] "SEC61A1" "SENP3" "SERP1"
## [487] "SF1" "SHFL" "SHQ1"
## [490] "SNU13" "SNX15" "SRBD1"
## [493] "SRFBP1" "SRP19" "SURF6"
## [496] "SUV39H1" "TACO1" "TBL3"
## [499] "TENT4B" "TERT" "TEX10"
## [502] "TFB1M" "TFB2M" "THUMPD1"
## [505] "TRAF7" "TRMT112" "TRMT61B"
## [508] "TSC1" "TSR1" "TSR2"
## [511] "TSR3" "UBA52" "UNG"
## [514] "URB1" "URB2" "URS0000051A30_9606"
## [517] "URS0000153780_9606" "URS0000229D6D_9606" "URS00003B7E88_9606"
## [520] "URS0000547024_9606" "URS00005B510E_9606" "URS0000697636_9606"
## [523] "URS00006D8FF2_9606" "URS000075C8FA_9606" "URS0000D5B596_9606"
## [526] "URS0001A04CEF_9606" "USP36" "UTP11"
## [529] "UTP14A" "UTP14C" "UTP15"
## [532] "UTP18" "UTP20" "UTP23"
## [535] "UTP3" "UTP4" "UTP6"
## [538] "VCX" "WBP11" "WDR12"
## [541] "WDR18" "WDR3" "WDR36"
## [544] "WDR43" "WDR46" "WDR55"
## [547] "WDR74" "WDR75" "WRN"
## [550] "XPO1" "XRCC5" "XRN2"
## [553] "YBEY" "ZC3H12A" "ZCCHC4"
## [556] "ZNF354A" "ZNF622" "ZNF658"
## [559] "ZNHIT3" "ZNHIT6"
#write.csv(biggest_list, "biggest_list.csv")