genecards data grab

This is a script that can work in combination with another function I made in a separate post, that returns the fold change on gene expression samples, by returning a list of 25 top genes for a protein such as ‘androgen’ from genecards.org. That function is named find25genes() and has one character argument of the protein you want the top 25 genes associated with. Then another function that will return the Entrez, Genecards, and UniProtKB gene summaries for each gene in a separate function getGeneSummaries(gene,protein) with two character arguments for the gene and protein. The tables will be combined if you use the separate functions after running each of the previous functions, getProteinGenes() and getGeneSummaries(), where the argument is a character argument of the name of the protein to read them in and combine the header and gene data, this will return the table to the screen but won’t read it in, but will give the file name to read in. Or go into the directory and add the header to the table yourself. This was done this way to alter the script later to combine the gene summaries to all the genes and a table that each gene is appended to with the summaries.

Note that knitr doesn’t read in the webpage, but it works fine within Rstudio. Some bugs with the rvest package or knitr. The other html webpages like Indeed.com work with knitr and rvest, not sure why this website doesn’t work with both rvest and knitr.

#Loading the rvest package
# install.packages("devtools")
#devtools::install_github("hadley/rvest",force=TRUE)

library(rvest)

## Loading required package: xml2

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Gene_Path <- './gene scrapes'

This next chunk of code will erase your data if you already have it stored.

if (dir.exists(Gene_Path)){
  unlink(Gene_Path, recursive=TRUE)
  dir.create(Gene_Path)
} else {
  dir.create(Gene_Path)
}

This function will return a table and header as separate csv files for the top 25 genes that genecards.org lists for a protein searched. Such as ‘estrogen’ or ‘androgen’ or even ‘protein.’

find25genes <- function(protein){
  
  url <- 'https://www.genecards.org/Search/Keyword?queryString=protein'
   
  protein <- as.character(protein)
  protein <- tolower(protein)
  protein <- gsub(' ','%20',protein)
  
  url <- as.character(url)
  url <- gsub('protein',protein, url)
  
  webpage <- read_html(url,encoding = "UTF-8")
  
  protein_html <- html_nodes(webpage,'.symbol-col a')
  protein1 <- html_text(protein_html)
  
  Protein <- as.data.frame(protein1)
  colnames(Protein) <- 'proteinType'
  Protein$proteinType <- as.character(paste(Protein$proteinType))
  Protein$proteinType <- gsub('\n','',Protein$proteinType)
  
  
  date <- as.data.frame(rep(date(),length(Protein$proteinType)))
  colnames(date) <- 'todaysDate'
  
  protein2 <- gsub('%20','-',protein)

  proteinName <- as.data.frame(rep(protein2,length(Protein$proteinType)))
  colnames(proteinName) <- 'proteinSearched'
  
  tableProtein <- cbind(Protein,proteinName,date)
  
  setwd(Gene_Path)
  

  write.table(tableProtein, 
              paste(protein2,".csv",sep=''), append=TRUE,
              col.names=FALSE, sep=",", quote=TRUE,qmethod="double",
              row.names=FALSE)
  names <- colnames(tableProtein)
  write.csv(names,paste('tableProteinHeader_',protein2,'.csv',sep=''),row.names=FALSE)
  
  setwd('../')
 
  
  
}

In Knitr there is an error that prevents using rvest to scrape the web: - Quitting from lines 92-93 (geneCardsDataGrab2.Rmd) Error in open.connection(x, “rb”) : HTTP error 403. Calls: … eval -> find25genes -> read_html -> read_html.default

Execution halted

find25genes('estrogen')

Function to combine the data with the header and return the table for the 25 genes collected on the protein searched in genecards.org with the find25genes().

getProteinGenes <- function(protein){
  protein <- as.character(protein)
  protein <- tolower(protein)
  protein <- gsub(' ','-',protein)
  table <- read.csv(paste(Gene_Path,'/',protein,'.csv',sep=''),sep=',',
                    header=F,na.strings=c('',' ','NA'), stringsAsFactors = F)
  header <- read.csv(paste(Gene_Path,'/tableProteinHeader_',protein,'.csv',sep=''),
                     sep=',', header=T, na.strings=c('',' ','NA'), stringsAsFactors = F)
  names <- header$x
  colnames(table) <- names
  fileName <- paste('Top25',protein,'s.csv',sep='')
  write.csv(table, fileName, row.names=FALSE)
  return(list(table,fileName))
}

getProteinGenes('estrogen')

This next function will get the gene summaries for one of the genes you get from the previous function and enter in as the first argument, and the second argument is the protein entered to get the 25 genes with the find25genes().

getSummaries <- function(gene,protein){
  url <- 'https://www.genecards.org/cgi-bin/carddisp.pl?gene=GENE&keywords=protein'
   
  protein <- as.character(protein)
  protein <- tolower(protein)
  protein <- gsub(' ',',',protein)
  gene <- as.character(gene)
  gene <- tolower(gene)
  
  url <- as.character(url)
  url <- gsub('GENE',gene,url)
  url <- gsub('protein',protein, url)

  webpage <- read_html(url,encoding = "UTF-8")
  
  Entrez_html <- html_nodes(webpage, '.gc-section-header+ .gc-subsection p')
  Entrez <- html_text(Entrez_html) 

  GeneCards_html <- html_nodes(webpage, '.gc-subsection-header+ p')
  GeneCards <- html_text(GeneCards_html) 

  UniProt_html <- html_nodes(webpage, '#summaries li:nth-child(1) div')
  UniProtKB <- html_text(UniProt_html) 

  Entrez0 <- ifelse(length(Entrez)==0, 'no summary',as.character(paste(Entrez)))
  Entrez1 <- as.data.frame(Entrez0)
  colnames(Entrez1) <- 'EntrezSummary'
  
  GeneCards0 <- ifelse(length(GeneCards)==0,'no summary',
                       as.character(paste(GeneCards)))
  GeneCards1 <- as.data.frame(GeneCards0)
  colnames(GeneCards1) <- 'GeneCardsSummary'
  
  UniProtKB0 <- ifelse(length(UniProtKB)==0,'no summary',
                       as.character(paste(UniProtKB)))
  UniProtKB1 <- as.data.frame(UniProtKB0)
  colnames(UniProtKB1) <- 'UniProtKB_Summary'
  
  Entrez1$EntrezSummary <- as.character(paste(Entrez1$EntrezSummary))
  Entrez1$EntrezSummary <- gsub('\n','',Entrez1$EntrezSummary)

  GeneCards1$GeneCardsSummary <- as.character(paste(GeneCards1$GeneCardsSummary))
  GeneCards1$GeneCardsSummary <- gsub('\n','',GeneCards1$GeneCardsSummary)

  UniProtKB1$UniProtKB_Summary <- as.character(paste(UniProtKB1$UniProtKB_Summary))
  UniProtKB1$UniProtKB_Summary <- gsub('\n','',UniProtKB1$UniProtKB_Summary)
  
  date <- as.data.frame(rep(date(),length(Entrez1$EntrezSummary)))
  colnames(date) <- 'todaysDate'
    
  protein2 <- gsub(',','-',protein)

  proteinName <- as.data.frame(rep(protein2,length(Entrez1$EntrezSummary)))
  colnames(proteinName) <- 'proteinSearched'
  
  gene <- as.data.frame(rep(toupper(gene),length(Entrez1$EntrezSummary)))
  colnames(gene) <- 'gene'
  
  tableProtein <- cbind(proteinName,gene,Entrez1,GeneCards1,UniProtKB1,date)
  
  setwd(Gene_Path)
  

  write.table(tableProtein, 
              paste(protein2,"summary.csv",sep=''), append=TRUE,
              col.names=FALSE, sep=",", quote=TRUE,qmethod="double",
              row.names=FALSE)
  names <- colnames(tableProtein)
  write.csv(names,paste('geneHeader_summary_',protein2,'.csv',sep=''),row.names=FALSE)
  
  setwd('../')
 
}

getSummaries('TP53','estrogen')

This function reads in the gene summaries, entrez,genecards, and uniprotKB for the protein data collected from the getSummaries().

getGeneSummaries <- function(protein){
  protein <- as.character(protein)
  protein <- tolower(protein)
  protein <- gsub(' ','-',protein)
  
  table <- read.csv(paste(Gene_Path,'/',protein,'summary.csv',sep=''),
                    sep=',',header=F,na.strings=c('',' ','NA'), stringsAsFactors = F)
  
  header <- read.csv(paste(Gene_Path,'/geneHeader_summary_',protein,'.csv',sep=''),
                     sep=',', header=T, na.strings=c('',' ','NA'), stringsAsFactors = F)
  names <- header$x
  colnames(table) <- names

  fileName <- paste('proteinGeneSummaries_',protein,'.csv',sep='')
  write.csv(table, fileName, row.names=FALSE)
  return(list(table,fileName))
}

getGeneSummaries('estrogen')

Lets take a look at that script published earlier in the month:

files located at: https://github.com/JanJanJan2018/Uterine-Fibroid-Beadchip-Genotypes-Analysis

These files are from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE593 and https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GPL96 Note that there are only 5 samples of each class of uterine leiomyoma (UL) or nonUL all the gene related information was obtained from genecards.org

Gene expression in a cell sample of tissue can mean the cell is creating more proteins needed in the body that are needed to maintain its living functions or are being enhanced or reduced or modified due to external factors such as environment, chemical, radiation, health disturbances like a viral infections.

“Several steps in the gene expression process may be modulated, including the transcription, RNA splicing, translation, and post-translational modification of a protein. Gene regulation gives the cell control over structure and function, and is the basis for cellular differentiation, morphogenesis and the versatility and adaptability of any organism.” {Gene expression - Wikipedia, en.wikipedia.org/wiki/Gene_expression}

This study is done as a quick look into genes expressed by microarray sheets that have 1 or more array cells for the same gene when collected. Each gene can be measured in each sample depending on how many times it is seen in that microarray design in the lab. For more information on how these genes were collected and measured for the study obtained at the link above to visit the National Center for Bioinformatics Information (NCBI) for the GSE593 study in the Gene Expression Omnibus (GEO).

Lets build our tables by reading them in for the ULs and nonULs.

ul <- read.delim('UL_GSE593_GPL96.csv', sep=',', header=TRUE, comment.char='#',
                 na.strings=c('',' ','NA'), stringsAsFactors = TRUE)

non_ul <- read.delim('nonUL_GSE593_GPL96.csv', sep=',', header=TRUE, comment.char='#',
                 na.strings=c('',' ','NA'), stringsAsFactors = TRUE)

head(ul)

##   X        ID GB_ACC Representative.Public.ID
## 1 1 1007_s_at U48705                   U48705
## 2 2   1053_at M87338                   M87338
## 3 3    117_at X51757                   X51757
## 4 4    121_at X69699                   X69699
## 5 5 1255_g_at L36861                   L36861
## 6 6   1294_at L13852                   L13852
##                                                      Gene.Title
## 1 discoidin domain receptor tyrosine kinase 1 /// microRNA 4640
## 2                   replication factor C (activator 1) 2, 40kDa
## 3                          heat shock 70kDa protein 6 (HSP70B')
## 4                                                  paired box 8
## 5                       guanylate cyclase activator 1A (retina)
## 6 microRNA 5193 /// ubiquitin-like modifier activating enzyme 7
##        Gene.Symbol     ENTREZ_GENE_ID GSM9093 GSM9094 GSM9095 GSM9096 GSM9097
## 1 DDR1 /// MIR4640  780 /// 100616237   212.9   289.7   242.4   230.7   235.5
## 2             RFC2               5982    52.6    15.0    57.9    20.4    43.6
## 3            HSPA6               3310    30.4    32.3    43.7    35.7    66.2
## 4             PAX8               7849   226.7   376.9   285.5   313.1   325.3
## 5           GUCA1A               2978    23.6    30.4     2.6    29.7    29.1
## 6 MIR5193 /// UBA7 7318 /// 100847079    90.3   157.1   139.6   144.1   140.9

colnames(ul)

##  [1] "X"                        "ID"                      
##  [3] "GB_ACC"                   "Representative.Public.ID"
##  [5] "Gene.Title"               "Gene.Symbol"             
##  [7] "ENTREZ_GENE_ID"           "GSM9093"                 
##  [9] "GSM9094"                  "GSM9095"                 
## [11] "GSM9096"                  "GSM9097"

Lets select only the samples and the gene symbol columns.

UL <- ul[,c(6,8:12)]
nonUL <- non_ul[,c(6,8:12)]

Next, we will split the gene symbol column that has multiple entries into the first three entries as lists to add to our tables and pull from

ulList <- as.character(UL$Gene.Symbol)
list <- strsplit(ulList,split=' /// ')
first <- lapply(list, '[',1)
second <- lapply(list, '[',2)
third <- lapply(list, '[',3)

UL$first <- as.character(first)
UL$second <- as.character(second)
UL$third <- as.character(third)

nonulList <- as.character(nonUL$Gene.Symbol)
list2 <- strsplit(nonulList,split=' /// ')
first <- lapply(list2, '[',1)
second <- lapply(list2, '[',2)
third <- lapply(list2, '[',3)

nonUL$first <- as.character(first)
nonUL$second <- as.character(second)
nonUL$third <- as.character(third)

Next, we will build the function to grab the gene by its gene symbol and return the fold change of the UL to nonUL ratio from the means and medians of our total genes across all five samples. This function is modified to write the information to a table for the UL and nonUL information. Make sure the files aren’t in this folder or write to a separate folder.

if (dir.exists('./UL and nonUL foldchange tables')){
  unlink('./UL and nonUL foldchange tables', recursive=TRUE)
  dir.create('./UL and nonUL foldchange tables')
} else {
  dir.create('./UL and nonUL foldchange tables')
}

getMeanMedian <- function(gene){
  gene <- as.character(paste(gene))
  gene0_ul <- UL[grep(gene,UL$Gene.Symbol),]
  gene0_nonul <- nonUL[grep(gene,UL$Gene.Symbol),]
  
  sub_ul <- subset(gene0_ul, gene0_ul$Gene.Symbol==gene |
                     gene0_ul$first==gene |
                     gene0_ul$third==gene |
                     gene0_ul$second==gene)
  
  sub_nonul <- subset(gene0_nonul, gene0_nonul$Gene.Symbol==gene|
                     gene0_nonul$first==gene |
                     gene0_nonul$third==gene |
                     gene0_nonul$second==gene)
    
  gene1_UL <- sub_ul[,2:6]
  gene1_nonUL <- sub_nonul[,2:6]
  
  gene1_UL$mean <- apply(gene1_UL,1,mean)
  gene1_UL$median <- apply(gene1_UL,1,median)
  gene1_nonUL$mean <- apply(gene1_nonUL,1,mean)
  gene1_nonUL$median <- apply(gene1_nonUL,1,median)
  
  gene1_UL$FoldChange_mean <- gene1_UL$mean/gene1_nonUL$mean
  gene1_UL$FoldChange_median <- gene1_UL$median/gene1_nonUL$median
  
  geneMeans <- gene1_UL$FoldChange_mean
  
  geneMedians <- gene1_UL$FoldChange_median
  
  
  print('The foldchage of UL means to nonUL means is:')
  print(geneMeans)
  
  print('The foldchage of UL medians to nonUL medians is:')
  print(geneMedians)
  
  colnames(gene1_UL) <- paste(colnames(gene1_UL), '_UL')
  colnames(gene1_nonUL) <- paste(colnames(gene1_nonUL), '_nonUL')
  
  setwd('./UL and nonUL foldchange tables')
  
  write.table(gene1_UL[2:length(gene1_UL$median),], "allGenesUL.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  UL_names <- colnames(gene1_UL)
  write.csv(UL_names,'header_UL_names.csv',row.names=FALSE)

  write.table(gene1_nonUL[2:length(gene1_nonUL$median),], "allGenesNonUL.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  nonUL_names <- colnames(gene1_nonUL)
  
  write.csv(nonUL_names,'header_nonUL_names.csv', row.names=FALSE)
  
  setwd('../')
  
  return(list(gene1_UL,gene1_nonUL))
}

Lets look at the iron gene expression of transferrin in UL compared to nonUL

getMeanMedian("TF")

## [1] "The foldchage of UL means to nonUL means is:"
## [1] 3.075665 1.394843 1.205266 8.888889
## [1] "The foldchage of UL medians to nonUL medians is:"
## [1]  2.813590  1.451360  1.300586 11.600000

## [[1]]
##       GSM9093 _UL GSM9094 _UL GSM9095 _UL GSM9096 _UL GSM9097 _UL mean _UL
## 2927          5.5        33.7        31.2        26.9        53.1    30.08
## 13442        14.1        35.8        40.8        27.3        55.1    34.62
## 13443         8.6        50.3        47.4        64.8        53.2    44.86
## 19473         8.7         0.9         0.7         6.9         6.8     4.80
##       median _UL FoldChange_mean _UL FoldChange_median _UL
## 2927       30.64            3.075665              2.813590
## 13442      35.21            1.394843              1.451360
## 13443      48.85            1.205266              1.300586
## 19473       5.80            8.888889             11.600000
## 
## [[2]]
##       GSM9098 _nonUL GSM9099 _nonUL GSM9100 _nonUL GSM9101 _nonUL
## 2927            18.8            0.9           13.0            4.2
## 13442           37.1           23.7           12.7           43.1
## 13443           34.9           37.9           10.9           61.9
## 19473            0.4            1.0            0.5            0.5
##       GSM9102 _nonUL mean _nonUL median _nonUL
## 2927            12.0        9.78         10.89
## 13442            7.5       24.82         24.26
## 13443           40.5       37.22         37.56
## 19473            0.3        0.54          0.50

Now that we have a function to get the top genes for a protein, in this case ‘tumor,’ we can look up other genes related to ‘tumor’ such as ‘TF’ and get the summaries without having to visit the site genecards.org and enter in each gene.Lets try it now.

Lets look at the top 25 genes for ‘tumor’ and then get the summaries.

find25genes('tumor')

Print out the top 25 genes for the protein searched.

getProteinGenes('tumor')

Grab the gene summaries for a particular gene and the protein interested in.

getSummaries('TP53','tumor')

Print the results for one of the genes and the protein searched.

getGeneSummaries('tumor')

Now lets look at how this gene does in UL tissue compared to non-UL tissue as far as fold change goes, with the ration of gene expression in UL/non-UL samples as a median and mean.

getMeanMedian('TP53')

## [1] "The foldchage of UL means to nonUL means is:"
## [1] 1.418934 1.054537
## [1] "The foldchage of UL medians to nonUL medians is:"
## [1] 1.348975 1.113545

## [[1]]
##       GSM9093 _UL GSM9094 _UL GSM9095 _UL GSM9096 _UL GSM9097 _UL mean _UL
## 1274        161.6       182.3       113.8       151.1       142.1   150.18
## 10723        67.0        59.5        43.6        35.8        18.4    44.86
##       median _UL FoldChange_mean _UL FoldChange_median _UL
## 1274      150.64            1.418934              1.348975
## 10723      44.23            1.054537              1.113545
## 
## [[2]]
##       GSM9098 _nonUL GSM9099 _nonUL GSM9100 _nonUL GSM9101 _nonUL
## 1274           120.3           70.0          103.8          117.5
## 10723           63.2           27.3           53.7           36.9
##       GSM9102 _nonUL mean _nonUL median _nonUL
## 1274           117.6      105.84        111.67
## 10723           31.6       42.54         39.72

It looks like the gene TP53 is expressed much more in the uterine leiomyoma, which is a tumor. This makes sense.

Lets use a different protein as double word search in genecards.org, ‘hair loss’ to be exact.

find25genes('hair loss')

getProteinGenes('hair loss')

getSummaries('GJB2','hair loss')

getGeneSummaries('hair loss')

To extract the genes: - find25genes(protein) will grab the 25 genes associated with the protein from web - getProteinGenes(protein) will print the genes associated with the protein - getSummaries(gene, protein) will grab the gene protein summaries from web -getGeneSummaries(protein) will print the gene summary of protein gene

The getMeanMedian(gene) will get the mean and median values for all instances of the gene in two samples being compared for pathogenesis/disease/control/treatment/etc

Lets modify this getMeanMedian(gene) function to take any data table with the gene symbol and the samples for that class and the other data table not of that class to get the mean and median fold change values for diseased/non-diseased values for a specific data table of varying samples involving blood tissue gene expression values. This data was prepared in an earlier project.

dracula <- read.csv('Dracula.csv', sep=',', header=T, na.strings=c('',' ','NA'),
                    stringsAsFactors = F)
dim(dracula)

## [1] 12845   179

colnames(dracula)

##   [1] "X"                                      
##   [2] "AD.Healthy.Male"                        
##   [3] "AD.Healthy.Male.1"                      
##   [4] "AD.Healthy.Male.2"                      
##   [5] "AD.Healthy.Male.3"                      
##   [6] "AD.Healthy.Male.4"                      
##   [7] "AD.Healthy.Male.5"                      
##   [8] "AD.Healthy.Male.6"                      
##   [9] "AD.Healthy.Male.7"                      
##  [10] "AD.Healthy.Male.8"                      
##  [11] "AD.Healthy.Male.9"                      
##  [12] "AD.Healthy.Male.10"                     
##  [13] "AD.Healthy.Male.11"                     
##  [14] "AD.Healthy.Male.12"                     
##  [15] "AD.Healthy.Male.13"                     
##  [16] "AD.Healthy.Male.14"                     
##  [17] "AD.Healthy.Male.15"                     
##  [18] "AD.Healthy.Female"                      
##  [19] "AD.Healthy.Female.1"                    
##  [20] "AD.Healthy.Female.2"                    
##  [21] "AD.Healthy.Female.3"                    
##  [22] "AD.Healthy.Female.4"                    
##  [23] "AD.Healthy.Female.5"                    
##  [24] "AD.Healthy.Female.6"                    
##  [25] "AD.Healthy.Female.7"                    
##  [26] "AD.Healthy.Female.8"                    
##  [27] "AD.Healthy.Female.9"                    
##  [28] "AD.Healthy.Female.10"                   
##  [29] "AD.Healthy.Female.11"                   
##  [30] "AD.Healthy.Female.12"                   
##  [31] "AD.Healthy.Female.13"                   
##  [32] "AD.Healthy.Female.14"                   
##  [33] "AD.Healthy.Female.15"                   
##  [34] "AD.Male"                                
##  [35] "AD.Male.1"                              
##  [36] "AD.Male.2"                              
##  [37] "AD.Male.3"                              
##  [38] "AD.Male.4"                              
##  [39] "AD.Male.5"                              
##  [40] "AD.Male.6"                              
##  [41] "AD.Male.7"                              
##  [42] "AD.Male.8"                              
##  [43] "AD.Male.9"                              
##  [44] "AD.Male.10"                             
##  [45] "AD.Male.11"                             
##  [46] "AD.Male.12"                             
##  [47] "AD.Male.13"                             
##  [48] "AD.Male.14"                             
##  [49] "AD.Male.15"                             
##  [50] "AD.Male.16"                             
##  [51] "AD.Male.17"                             
##  [52] "AD.Male.18"                             
##  [53] "AD.Male.19"                             
##  [54] "AD.Male.20"                             
##  [55] "AD.Male.21"                             
##  [56] "AD.Female"                              
##  [57] "AD.Female.1"                            
##  [58] "AD.Female.2"                            
##  [59] "AD.Female.3"                            
##  [60] "AD.Female.4"                            
##  [61] "AD.Female.5"                            
##  [62] "AD.Female.6"                            
##  [63] "AD.Female.7"                            
##  [64] "AD.Female.8"                            
##  [65] "AD.Female.9"                            
##  [66] "AD.Female.10"                           
##  [67] "AD.Female.11"                           
##  [68] "AD.Female.12"                           
##  [69] "AD.Female.13"                           
##  [70] "AD.Female.14"                           
##  [71] "AD.Female.15"                           
##  [72] "AD.Female.16"                           
##  [73] "AD.Female.17"                           
##  [74] "AD.Female.18"                           
##  [75] "AD.Female.19"                           
##  [76] "AD.Female.20"                           
##  [77] "AD.Female.21"                           
##  [78] "AD.Female.22"                           
##  [79] "AD.Female.23"                           
##  [80] "Hemo.Healthy.Female"                    
##  [81] "Hemo.Healthy.Female.1"                  
##  [82] "Hemo.Healthy.Male"                      
##  [83] "Hemo.Healthy.Male.1"                    
##  [84] "Hemo.Healthy.Male.2"                    
##  [85] "Hemo.Healthy.Male.3"                    
##  [86] "Hemo.Male"                              
##  [87] "Hemo.Male.1"                            
##  [88] "Hemo.Male.2"                            
##  [89] "Hemo.Male.3"                            
##  [90] "Hemo.Male.4"                            
##  [91] "Hemo.Male.5"                            
##  [92] "Hemo.Male.6"                            
##  [93] "Hemo.Male.7"                            
##  [94] "Hemo.Male.8"                            
##  [95] "Hemo.Male.9"                            
##  [96] "Hemo.Male.10"                           
##  [97] "Hemo.Male.11"                           
##  [98] "Hemo.Male.12"                           
##  [99] "Hemo.Female"                            
## [100] "Hemo.Female.1"                          
## [101] "Hemo.Female.2"                          
## [102] "Hemo.Female.3"                          
## [103] "Hemo.Female.4"                          
## [104] "Overweight.Female"                      
## [105] "Overweight.Female.1"                    
## [106] "Overweight.Female.2"                    
## [107] "Overweight.Female.3"                    
## [108] "Overweight.Female.4"                    
## [109] "Overweight.Female.5"                    
## [110] "Overweight.Female.6"                    
## [111] "Overweight.Female.7"                    
## [112] "Overweight.Female.8"                    
## [113] "Overweight.Female.9"                    
## [114] "Overweight.Female.10"                   
## [115] "Overweight.Female.11"                   
## [116] "Overweight.Female.12"                   
## [117] "Overweight.Female.13"                   
## [118] "Overweight.Female.EGCG.Treated"         
## [119] "Overweight.Female.EGCG.Treated.1"       
## [120] "Overweight.Female.EGCG.Treated.2"       
## [121] "Overweight.Female.EGCG.Treated.3"       
## [122] "Overweight.Female.EGCG.Treated.4"       
## [123] "Overweight.Female.EGCG.Treated.5"       
## [124] "Overweight.Female.EGCG.Treated.6"       
## [125] "Overweight.Female.EGCG.fishOil.VitC"    
## [126] "Overweight.Female.EGCG.fishOil.VitC.1"  
## [127] "Overweight.Female.EGCG.fishOil.VitC.2"  
## [128] "Overweight.Female.EGCG.fishOil.VitC.3"  
## [129] "Overweight.Female.EGCG.fishOil.VitC.4"  
## [130] "Overweight.Female.EGCG.fishOil.VitC.5"  
## [131] "Overweight.Female.EGCG.fishOil.VitC.6"  
## [132] "Flu.non.immunized.healthy"              
## [133] "Flu.non.immunized.healthy.1"            
## [134] "Flu.non.immunized.healthy.2"            
## [135] "Flu.non.immunized.healthy.3"            
## [136] "Flu.non.immunized.healthy.4"            
## [137] "Flu.non.immunized.healthy.5"            
## [138] "Flu.Immunized.Plus.Antibiotics.Day.1"   
## [139] "Flu.Immunized.Plus.Antibiotics.Day.1.1" 
## [140] "Flu.Immunized.Plus.Antibiotics.Day.1.2" 
## [141] "Flu.Immunized.Plus.Antibiotics.Day.3"   
## [142] "Flu.Immunized.Plus.Antibiotics.Day.3.1" 
## [143] "Flu.Immunized.Plus.Antibiotics.Day.3.2" 
## [144] "Flu.Immunized.Plus.Antibiotics.Day.7"   
## [145] "Flu.Immunized.Plus.Antibiotics.Day.7.1" 
## [146] "Flu.Immunized.Plus.Antibiotics.Day.7.2" 
## [147] "Flu.Immunized.Plus.Antibiotics.Day.21"  
## [148] "Flu.Immunized.Plus.Antibiotics.Day.21.1"
## [149] "Flu.Immunized.Plus.Antibiotics.Day.21.2"
## [150] "Flu.Immunized.Only.Day.1"               
## [151] "Flu.Immunized.Only.Day.1.1"             
## [152] "Flu.Immunized.Only.Day.1.2"             
## [153] "Flu.Immunized.Only.Day.3"               
## [154] "Flu.Immunized.Only.Day.3.1"             
## [155] "Flu.Immunized.Only.Day.3.2"             
## [156] "Flu.Immunized.Only.Day.7"               
## [157] "Flu.Immunized.Only.Day.7.1"             
## [158] "Flu.Immunized.Only.Day.7.2"             
## [159] "Flu.Immunized.Only.Day.21"              
## [160] "Flu.Immunized.Only.Day.21.1"            
## [161] "Flu.Immunized.Only.Day.21.2"            
## [162] "MI.Healthy.Males"                       
## [163] "MI.Healthy.Males.1"                     
## [164] "MI.Healthy.Males.2"                     
## [165] "MI.Healthy.Males.3"                     
## [166] "MI.Healthy.Males.4"                     
## [167] "MI.Healthy.Males.5"                     
## [168] "MI.Males"                               
## [169] "MI.Males.1"                             
## [170] "MI.Males.2"                             
## [171] "MI.Males.3"                             
## [172] "MI.Males.4"                             
## [173] "MI.Males.5"                             
## [174] "Tetanis.Immunized"                      
## [175] "Tetanis.Immunized.1"                    
## [176] "Tetanis.Immunized.2"                    
## [177] "Tetanis.Immunized.3"                    
## [178] "Tetanis.Immunized.4"                    
## [179] "Tetanis.Immunized.5"

Lets rename the X column to ‘Gene.Symbol’ in our new data table.

colnames(dracula)[1] <- 'Gene.Symbol'
head(colnames(dracula))

## [1] "Gene.Symbol"       "AD.Healthy.Male"   "AD.Healthy.Male.1"
## [4] "AD.Healthy.Male.2" "AD.Healthy.Male.3" "AD.Healthy.Male.4"

Lets also make a data table of the non-immunized flu healthy control samples and those immunized flu samples that are from 1 day and 7 days after immunization to compare.

FluImz_1day <- dracula[,c(1,150:152)]
FluImz_7day <- dracula[,c(1,156:158)]
head(FluImz_1day)

##   Gene.Symbol Flu.Immunized.Only.Day.1 Flu.Immunized.Only.Day.1.1
## 1        A1BG                 4.231631                   3.736454
## 2        A1CF                 3.146671                   2.914839
## 3         A2M                 2.516229                   2.290962
## 4       A2ML1                 2.427249                   2.418693
## 5        AAAS                 4.466718                   4.337557
## 6        AACS                 5.002461                   5.451671
##   Flu.Immunized.Only.Day.1.2
## 1                   4.228881
## 2                   3.175774
## 3                   2.352346
## 4                   2.376332
## 5                   4.213542
## 6                   5.548737

head(FluImz_7day)

##   Gene.Symbol Flu.Immunized.Only.Day.7 Flu.Immunized.Only.Day.7.1
## 1        A1BG                 4.280365                   4.211709
## 2        A1CF                 2.922931                   3.316505
## 3         A2M                 2.413767                   2.552877
## 4       A2ML1                 2.232214                   2.503466
## 5        AAAS                 4.218476                   4.207981
## 6        AACS                 5.332674                   4.998220
##   Flu.Immunized.Only.Day.7.2
## 1                   4.219890
## 2                   3.004159
## 3                   2.267620
## 4                   2.246292
## 5                   4.270130
## 6                   5.604253

healthy_noFluImz <- dracula[,c(1,132:137)]
head(healthy_noFluImz)

##   Gene.Symbol Flu.non.immunized.healthy Flu.non.immunized.healthy.1
## 1        A1BG                  4.022284                    4.256802
## 2        A1CF                  3.189060                    3.092595
## 3         A2M                  3.270110                    3.117491
## 4       A2ML1                  2.888058                    2.748343
## 5        AAAS                  4.319929                    4.680657
## 6        AACS                  6.487653                    6.178077
##   Flu.non.immunized.healthy.2 Flu.non.immunized.healthy.3
## 1                    4.373453                    4.021917
## 2                    3.588691                    3.134153
## 3                    3.231922                    2.364401
## 4                    2.763565                    2.403953
## 5                    4.559417                    4.303755
## 6                    6.176500                    4.922308
##   Flu.non.immunized.healthy.4 Flu.non.immunized.healthy.5
## 1                    4.398104                    4.035720
## 2                    2.959022                    3.149342
## 3                    2.302118                    2.310950
## 4                    2.319035                    2.668853
## 5                    4.157258                    3.471550
## 6                    5.548462                    5.412336

getMeanMedianFlu1 <- function(gene){
  gene <- as.character(paste(gene))
  gene0_ul <- FluImz_1day[grep(gene,FluImz_1day$Gene.Symbol),]
  gene0_nonul <- healthy_noFluImz[grep(gene,healthy_noFluImz$Gene.Symbol),]
  
  sub_ul <- subset(gene0_ul, gene0_ul$Gene.Symbol==gene )

  sub_nonul <- subset(gene0_nonul, gene0_nonul$Gene.Symbol==gene)

  gene1_UL <- sub_ul[,2:4]
  gene1_nonUL <- sub_nonul[,2:7]
  
  gene1_UL$mean <- apply(gene1_UL,1,mean)
  gene1_UL$median <- apply(gene1_UL,1,median)
  gene1_nonUL$mean <- apply(gene1_nonUL,1,mean)
  gene1_nonUL$median <- apply(gene1_nonUL,1,median)
  
  gene1_UL$FoldChange_mean <- gene1_UL$mean/gene1_nonUL$mean
  gene1_UL$FoldChange_median <- gene1_UL$median/gene1_nonUL$median
  
  geneMeans <- gene1_UL$FoldChange_mean
  
  geneMedians <- gene1_UL$FoldChange_median
  
  
  print('The foldchage of flu immunized after 1 day means to heathy non-immunized flu means is:')
  print(geneMeans)
  
  print('The foldchage of flu immunized after 1 day medians to non-immunized flu medians is:')
  print(geneMedians)
  
  colnames(gene1_UL) <- paste(colnames(gene1_UL), '_flu_1day')
  colnames(gene1_nonUL) <- paste(colnames(gene1_nonUL), '_healthyNonImmz')
  

  write.table(gene1_UL[2:length(gene1_UL$median),], "allFluImz1day.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  UL_names <- colnames(gene1_UL)
  write.csv(UL_names,'header_allFluImz1day.csv',row.names=FALSE)

  write.table(gene1_nonUL[2:length(gene1_nonUL$median),], "allhealthyNonImzFlu.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  nonUL_names <- colnames(gene1_nonUL)
  
  write.csv(nonUL_names,'header_allhealthyNonImzFlu.csv', row.names=FALSE)
  

  return(list(gene1_UL,gene1_nonUL))
}

find25genes('immunity')
getProteinGenes('immunity')

I already saw that RAG1 wasn’t in our data table of 12845 genes, so we will use IL10, the next listed gene for ‘immunity.’

getSummaries('IL10','immunity')
getGeneSummaries('immunity')

getMeanMedianFlu1('IL10')

## [1] "The foldchage of flu immunized after 1 day means to heathy non-immunized flu means is:"
## [1] 0.9151371
## [1] "The foldchage of flu immunized after 1 day medians to non-immunized flu medians is:"
## [1] 0.9113448

## [[1]]
##      Flu.Immunized.Only.Day.1 _flu_1day Flu.Immunized.Only.Day.1.1 _flu_1day
## 5068                           2.698987                             2.873398
##      Flu.Immunized.Only.Day.1.2 _flu_1day mean _flu_1day median _flu_1day
## 5068                             2.592244       2.721543         2.710265
##      FoldChange_mean _flu_1day FoldChange_median _flu_1day
## 5068                 0.9151371                   0.9113448
## 
## [[2]]
##      Flu.non.immunized.healthy _healthyNonImmz
## 5068                                  3.045313
##      Flu.non.immunized.healthy.1 _healthyNonImmz
## 5068                                    3.073922
##      Flu.non.immunized.healthy.2 _healthyNonImmz
## 5068                                    3.916677
##      Flu.non.immunized.healthy.3 _healthyNonImmz
## 5068                                    2.552231
##      Flu.non.immunized.healthy.4 _healthyNonImmz
## 5068                                    2.596116
##      Flu.non.immunized.healthy.5 _healthyNonImmz mean _healthyNonImmz
## 5068                                    2.659251             2.973918
##      median _healthyNonImmz
## 5068               2.973918

From the above, we can see that after 1 day of immunizations for the flu virus compared to healthy and non-immunized blood samples, that there is an overall decline in gene expression for IL10, which our gene summaries says is:

immSumm <- read.csv('proteinGeneSummaries_immunity.csv', sep=',', header=T)
head(immSumm)

##   proteinSearched gene
## 1        immunity RAG1
## 2        immunity IL10
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 EntrezSummary
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                               The protein encoded by this gene is involved in activation of immunoglobulin V-D-J recombination. The encoded protein is involved in recognition of the DNA substrate, but stable binding and cleavage activity also requires RAG2. Defects in this gene can be the cause of several diseases. [provided by RefSeq, Jul 2008]
## 2 The protein encoded by this gene is a cytokine produced primarily by monocytes and to a lesser extent by lymphocytes. This cytokine has pleiotropic effects in immunoregulation and inflammation. It down-regulates the expression of Th1 cytokines, MHC class II Ags, and costimulatory molecules on macrophages. It also enhances B cell survival, proliferation, and antibody production. This cytokine can block NF-kappa B activity, and is involved in the regulation of the JAK-STAT signaling pathway. Knockout studies in mice suggested the function of this cytokine as an essential immunoregulator in the intestinal tract. Mutations in this gene are associated with an increased susceptibility to HIV-1 infection and rheumatoid arthritis. [provided by RefSeq, May 2020]
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       GeneCardsSummary
## 1                                             RAG1 (Recombination Activating 1) is a Protein Coding gene.                                            Diseases associated with RAG1 include Combined Cellular And Humoral Immune Defects With Granulomas and Alpha/Beta T-Cell Lymphopenia With Gamma/Delta T-Cell Expansion, Severe Cytomegalovirus Infection, And Autoimmunity.                                            Among its related pathways are Signaling by GPCR and Regulation of activated PAK-2p34 by proteasome mediated degradation.                                            Gene Ontology (GO) annotations related to this gene include protein homodimerization activity and ubiquitin-protein transferase activity.                                            
## 2                                                                                                                                                                                                           IL10 (Interleukin 10) is a Protein Coding gene.                                            Diseases associated with IL10 include Graft-Versus-Host Disease and Human Immunodeficiency Virus Type 1.                                            Among its related pathways are Tuberculosis and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include cytokine activity and interleukin-10 receptor binding.                                            An important paralog of this gene is IL20.
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      UniProtKB_Summary
## 1 Catalytic component of the RAG complex, a multiprotein complex that mediates the DNA cleavage phase during V(D)J recombination. V(D)J recombination assembles a diverse repertoire of immunoglobulin and T-cell receptor genes in developing B and T-lymphocytes through rearrangement of different V (variable), in some cases D (diversity), and J (joining) gene segments. In the RAG complex, RAG1 mediates the DNA-binding to the conserved recombination signal sequences (RSS) and catalyzes the DNA cleavage activities by introducing a double-strand break between the RSS and the adjacent coding segment. RAG2 is not a catalytic component but is required for all known catalytic activities. DNA cleavage occurs in 2 steps: a first nick is introduced in the top strand immediately upstream of the heptamer, generating a 3'-hydroxyl group that can attack the phosphodiester bond on the opposite strand in a direct transesterification reaction, thereby creating 4 DNA ends: 2 hairpin coding ends and 2 blunt, 5'-phosphorylated ends. The chromatin structure plays an essential role in the V(D)J recombination reactions and the presence of histone H3 trimethylated at 'Lys-4' (H3K4me3) stimulates both the nicking and haipinning steps. The RAG complex also plays a role in pre-B cell allelic exclusion, a process leading to expression of a single immunoglobulin heavy chain allele to enforce clonality and monospecific recognition by the B-cell antigen receptor (BCR) expressed on individual B-lymphocytes. The introduction of DNA breaks by the RAG complex on one immunoglobulin allele induces ATM-dependent repositioning of the other allele to pericentromeric heterochromatin, preventing accessibility to the RAG complex and recombination of the second allele. In addition to its endonuclease activity, RAG1 also acts as an E3 ubiquitin-protein ligase that mediates monoubiquitination of histone H3. Histone H3 monoubiquitination is required for the joining step of V(D)J recombination. Mediates polyubiquitination of KPNA1 (By similarity).\n                         RAG1_HUMAN,P15918\n                         
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Major immune regulatory cytokine that acts on many cells of the immune system where it has profound anti-inflammatory functions, limiting excessive tissue disruption caused by inflammation. Mechanistically, IL10 binds to its heterotetrameric receptor comprising IL10RA and IL10RB leading to JAK1 and STAT2-mediated phosphorylation of STAT3 (PubMed:16982608). In turn, STAT3 translocates to the nucleus where it drives expression of anti-inflammatory mediators (PubMed:18025162). Targets antigen-presenting cells (APCs) such as macrophages and monocytes and inhibits their release of pro-inflammatory cytokines including granulocyte-macrophage colony-stimulating factor /GM-CSF, granulocyte colony-stimulating factor/G-CSF, IL-1 alpha, IL-1 beta, IL-6, IL-8 and TNF-alpha (PubMed:1940799, PubMed:7512027, PubMed:11564774). Interferes also with antigen presentation by reducing the expression of MHC-class II and co-stimulatory molecules, thereby inhibiting their ability to induce T cell activation (PubMed:8144879). In addition, controls the inflammatory response of macrophages by reprogramming essential metabolic pathways including mTOR signaling (By similarity).\n                         IL10_HUMAN,P22301\n                         
##                 todaysDate
## 1 Thu Jul 30 07:32:02 2020
## 2 Thu Jul 30 07:35:15 2020

immSumm[2,3]

## [1] The protein encoded by this gene is a cytokine produced primarily by monocytes and to a lesser extent by lymphocytes. This cytokine has pleiotropic effects in immunoregulation and inflammation. It down-regulates the expression of Th1 cytokines, MHC class II Ags, and costimulatory molecules on macrophages. It also enhances B cell survival, proliferation, and antibody production. This cytokine can block NF-kappa B activity, and is involved in the regulation of the JAK-STAT signaling pathway. Knockout studies in mice suggested the function of this cytokine as an essential immunoregulator in the intestinal tract. Mutations in this gene are associated with an increased susceptibility to HIV-1 infection and rheumatoid arthritis. [provided by RefSeq, May 2020]
## 2 Levels: The protein encoded by this gene is a cytokine produced primarily by monocytes and to a lesser extent by lymphocytes. This cytokine has pleiotropic effects in immunoregulation and inflammation. It down-regulates the expression of Th1 cytokines, MHC class II Ags, and costimulatory molecules on macrophages. It also enhances B cell survival, proliferation, and antibody production. This cytokine can block NF-kappa B activity, and is involved in the regulation of the JAK-STAT signaling pathway. Knockout studies in mice suggested the function of this cytokine as an essential immunoregulator in the intestinal tract. Mutations in this gene are associated with an increased susceptibility to HIV-1 infection and rheumatoid arthritis. [provided by RefSeq, May 2020] ...

The immunity gene IL10 was appended to the previous gene, RAG1 that had no entry in our data table.

Now lets create a function that does this for the flu immunized samples after 7 days.

getMeanMedianFlu7 <- function(gene){
  gene <- as.character(paste(gene))
  gene0_ul <- FluImz_7day[grep(gene,FluImz_7day$Gene.Symbol),]
  gene0_nonul <- healthy_noFluImz[grep(gene,healthy_noFluImz$Gene.Symbol),]
  
  sub_ul <- subset(gene0_ul, gene0_ul$Gene.Symbol==gene )

  sub_nonul <- subset(gene0_nonul, gene0_nonul$Gene.Symbol==gene)

  gene1_UL <- sub_ul[,2:4]
  gene1_nonUL <- sub_nonul[,2:7]
  
  gene1_UL$mean <- apply(gene1_UL,1,mean)
  gene1_UL$median <- apply(gene1_UL,1,median)
  gene1_nonUL$mean <- apply(gene1_nonUL,1,mean)
  gene1_nonUL$median <- apply(gene1_nonUL,1,median)
  
  gene1_UL$FoldChange_mean <- gene1_UL$mean/gene1_nonUL$mean
  gene1_UL$FoldChange_median <- gene1_UL$median/gene1_nonUL$median
  
  geneMeans <- gene1_UL$FoldChange_mean
  
  geneMedians <- gene1_UL$FoldChange_median
  
  
  print('The foldchage of flu immunized after 7 days means to heathy non-immunized flu means is:')
  print(geneMeans)
  
  print('The foldchage of flu immunized after 7 days medians to non-immunized flu medians is:')
  print(geneMedians)
  
  colnames(gene1_UL) <- paste(colnames(gene1_UL), '_flu_7day')
  colnames(gene1_nonUL) <- paste(colnames(gene1_nonUL), '_healthyNonImmz')
  

  write.table(gene1_UL[2:length(gene1_UL$median),], "allFluImz7day.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  UL_names <- colnames(gene1_UL)
  write.csv(UL_names,'header_allFluImz7day.csv',row.names=FALSE)

  write.table(gene1_nonUL[2:length(gene1_nonUL$median),], "allhealthyNonImzFlu.csv", append=TRUE, 
              col.names=FALSE, sep=",",
              row.names=TRUE)
  nonUL_names <- colnames(gene1_nonUL)
  
  write.csv(nonUL_names,'header_allhealthyNonImzFlu.csv', row.names=FALSE)
  

  return(list(gene1_UL,gene1_nonUL))
}

Lets see the gene expression values across samples of immunizations after 7 days for the flu.

getMeanMedianFlu7('IL10')

## [1] "The foldchage of flu immunized after 7 days means to heathy non-immunized flu means is:"
## [1] 0.9590879
## [1] "The foldchage of flu immunized after 7 days medians to non-immunized flu medians is:"
## [1] 0.9599085

## [[1]]
##      Flu.Immunized.Only.Day.7 _flu_7day Flu.Immunized.Only.Day.7.1 _flu_7day
## 5068                           2.704722                             2.994895
##      Flu.Immunized.Only.Day.7.2 _flu_7day mean _flu_7day median _flu_7day
## 5068                              2.85713       2.852249         2.854689
##      FoldChange_mean _flu_7day FoldChange_median _flu_7day
## 5068                 0.9590879                   0.9599085
## 
## [[2]]
##      Flu.non.immunized.healthy _healthyNonImmz
## 5068                                  3.045313
##      Flu.non.immunized.healthy.1 _healthyNonImmz
## 5068                                    3.073922
##      Flu.non.immunized.healthy.2 _healthyNonImmz
## 5068                                    3.916677
##      Flu.non.immunized.healthy.3 _healthyNonImmz
## 5068                                    2.552231
##      Flu.non.immunized.healthy.4 _healthyNonImmz
## 5068                                    2.596116
##      Flu.non.immunized.healthy.5 _healthyNonImmz mean _healthyNonImmz
## 5068                                    2.659251             2.973918
##      median _healthyNonImmz
## 5068               2.973918

After 7 days the IL10 gene expression increased more than after 1 day of immunizations when compared to the identical non-immunized flu healthy samples.

Out of curiosity what are the most expressed genes in fold change from the treated to control states for the flu samples after 1 and 7 days? Lets find out.

flu7 <- FluImz_7day %>% group_by(Gene.Symbol) %>% summarise_at(vars(Flu.Immunized.Only.Day.7:Flu.Immunized.Only.Day.7.2),mean)

flu7DF <- as.data.frame(flu7)
colnames(flu7DF) <- c('Gene.Symbol','Flu7_sample1','Flu7_sample2','Flu7_sample3')
row.names(flu7DF) <- flu7DF$Gene.Symbol
flu7DF <- flu7DF[,-1]
flu7DF$Flu7_mean <- apply(flu7DF,1,mean)
head(flu7DF)

##       Flu7_sample1 Flu7_sample2 Flu7_sample3 Flu7_mean
## A1BG      4.280365     4.211709     4.219890  4.237321
## A1CF      2.922931     3.316505     3.004159  3.081198
## A2M       2.413767     2.552877     2.267620  2.411421
## A2ML1     2.232214     2.503466     2.246292  2.327324
## AAAS      4.218476     4.207981     4.270130  4.232195
## AACS      5.332674     4.998220     5.604253  5.311716

flu1 <- FluImz_1day %>% group_by(Gene.Symbol) %>% summarise_at(vars(Flu.Immunized.Only.Day.1:Flu.Immunized.Only.Day.1.2),mean)

flu1DF <- as.data.frame(flu1)
colnames(flu1DF) <- c('Gene.Symbol','flu1_sample1','flu1_sample2','flu1_sample3')
row.names(flu1DF) <- flu1DF$Gene.Symbol
flu1DF <- flu1DF[,-1]
flu1DF$Flu1_mean <- apply(flu1DF,1,mean)
head(flu1DF)

##       flu1_sample1 flu1_sample2 flu1_sample3 Flu1_mean
## A1BG      4.231631     3.736454     4.228881  4.065655
## A1CF      3.146671     2.914839     3.175774  3.079095
## A2M       2.516229     2.290962     2.352346  2.386512
## A2ML1     2.427249     2.418693     2.376332  2.407425
## AAAS      4.466718     4.337557     4.213542  4.339273
## AACS      5.002461     5.451671     5.548737  5.334290

healthy <- healthy_noFluImz %>% group_by(Gene.Symbol) %>% summarise_at(vars(Flu.non.immunized.healthy:Flu.non.immunized.healthy.5),mean)

healthyDF <- as.data.frame(healthy)
colnames(healthyDF) <- c('Gene.Symbol','healthy_sample1','healthy_sample2','healthy_sample3',
                         'healthy_sample4', 'healthy_sample5','healthy_sample6')

row.names(healthyDF) <- healthyDF$Gene.Symbol
healthyDF <- healthyDF[,-1]
healthyDF$healthy_mean <- apply(healthyDF,1,mean)
head(healthyDF)

##       healthy_sample1 healthy_sample2 healthy_sample3 healthy_sample4
## A1BG         4.022284        4.256802        4.373453        4.021917
## A1CF         3.189060        3.092595        3.588691        3.134153
## A2M          3.270110        3.117491        3.231922        2.364401
## A2ML1        2.888058        2.748343        2.763565        2.403953
## AAAS         4.319929        4.680657        4.559417        4.303755
## AACS         6.487653        6.178077        6.176500        4.922308
##       healthy_sample5 healthy_sample6 healthy_mean
## A1BG         4.398104        4.035720     4.184713
## A1CF         2.959022        3.149342     3.185477
## A2M          2.302118        2.310950     2.766165
## A2ML1        2.319035        2.668853     2.631968
## AAAS         4.157258        3.471550     4.248761
## AACS         5.548462        5.412336     5.787556

healthyDF$flu7_mean <- flu7DF$Flu7_mean
healthyDF$flu1_mean <- flu1DF$Flu1_mean
healthyDF$flu7_FC <- healthyDF$flu7_mean/healthyDF$healthy_mean
healthyDF$flu1_FC <- healthyDF$flu1_mean/healthyDF$healthy_mean

healthyDF_flu7 <- healthyDF[order(healthyDF$flu7_FC, decreasing=T),]
healthyDF_flu1 <- healthyDF[order(healthyDF$flu1_FC, decreasing=T),]

head(healthyDF_flu7,10)

##       healthy_sample1 healthy_sample2 healthy_sample3 healthy_sample4
## IL6          3.532791        3.506112        3.862225       10.847922
## IL12B        2.859991        2.985050        2.582881        6.932016
## PLA1A        2.783971        2.938718        2.927507        6.053301
## CCL20        3.358975        3.505948        4.622152       10.917330
## CXCL3        2.800063        3.147927        3.577131        7.197023
## IL1A         3.346607        3.086545        3.599909        7.363630
## IFNB1        2.745418        2.842194        3.033243        6.283625
## CXCL1        4.035740        3.614128        4.733442        8.560762
## MN1          3.752404        3.595350        4.361879        5.563545
## CCL2         2.943400        3.020114        2.996591        5.582259
##       healthy_sample5 healthy_sample6 healthy_mean flu7_mean flu1_mean  flu7_FC
## IL6          7.352172        6.917782     6.003167 11.282219  5.065581 1.879378
## IL12B        2.862626        2.727428     3.491665  6.114039  2.696345 1.751038
## PLA1A        4.466492        3.871127     3.840186  6.528760  3.453236 1.700115
## CCL20       10.395168        7.341819     6.690232 10.853911  6.348397 1.622352
## CXCL3        6.911485        2.724020     4.392942  7.120630  3.215502 1.620925
## IL1A         5.552124        3.910816     4.476605  6.964573  3.796884 1.555771
## IFNB1        2.660872        2.298769     3.310687  5.137388  2.334119 1.551759
## CXCL1        7.880634        3.082269     5.317829  7.918206  3.753831 1.488992
## MN1          3.261076        3.864003     4.066376  5.925166  3.707288 1.457112
## CCL2         3.973207        2.856823     3.562066  5.130118  2.964963 1.440209
##         flu1_FC
## IL6   0.8438181
## IL12B 0.7722233
## PLA1A 0.8992367
## CCL20 0.9489053
## CXCL3 0.7319702
## IL1A  0.8481615
## IFNB1 0.7050257
## CXCL1 0.7058953
## MN1   0.9116933
## CCL2  0.8323718

head(healthyDF_flu1,10)

##          healthy_sample1 healthy_sample2 healthy_sample3 healthy_sample4
## TPPP3           4.158310        3.923552        4.068103        4.755469
## DSP             4.169059        3.379389        3.150569        4.697270
## SERPING1        2.854054        3.420882        3.278454        2.951903
## ANKRD22         3.971623        4.492860        3.943555        3.730357
## CNN3            3.486748        4.035040        3.670648        3.326756
## IFI27           3.582496        3.674980        3.235038        3.123812
## CXCL10          5.053084        5.597071        5.069216        7.291671
## TEX13B          2.987415        3.020468        3.119884        2.769849
## MAP1S           3.795172        3.803262        4.163974        3.625743
## P2RY2           4.833359        4.578032        4.979556        3.612481
##          healthy_sample5 healthy_sample6 healthy_mean flu7_mean flu1_mean
## TPPP3           4.128901        3.825179     4.143252  4.107384  5.051876
## DSP             2.784697        5.645555     3.971090  4.605834  4.833565
## SERPING1        3.052786        2.481578     3.006609  2.697820  3.620393
## ANKRD22         3.349001        3.289842     3.796206  3.598591  4.564063
## CNN3            3.047465        3.191846     3.459751  3.473371  4.112458
## IFI27           6.718688        7.590538     4.654259  3.491754  5.480094
## CXCL10          6.357247        4.094206     5.577082  7.967758  6.536448
## TEX13B          2.635829        2.894804     2.904708  2.933190  3.399027
## MAP1S           4.797805        3.824317     4.001712  3.895361  4.654756
## P2RY2           3.911698        3.956133     4.311876  4.002677  4.979750
##            flu7_FC  flu1_FC
## TPPP3    0.9913430 1.219302
## DSP      1.1598411 1.217189
## SERPING1 0.8972964 1.204145
## ANKRD22  0.9479439 1.202269
## CNN3     1.0039369 1.188657
## IFI27    0.7502278 1.177437
## CXCL10   1.4286606 1.172019
## TEX13B   1.0098053 1.170178
## MAP1S    0.9734238 1.163191
## P2RY2    0.9282913 1.154892

The 10 genes most expressed in flu immunized samples after 7 days are:

row.names(healthyDF_flu7)[1:10]

##  [1] "IL6"   "IL12B" "PLA1A" "CCL20" "CXCL3" "IL1A"  "IFNB1" "CXCL1" "MN1"  
## [10] "CCL2"

The 10 genes most expressed in flu immunized samples after 1 day are:

row.names(healthyDF_flu1)[1:10]

##  [1] "TPPP3"    "DSP"      "SERPING1" "ANKRD22"  "CNN3"     "IFI27"   
##  [7] "CXCL10"   "TEX13B"   "MAP1S"    "P2RY2"

What is the gene TPPP3 and also the gene DSP?

getSummaries('TPPP3','protein')
getSummaries('DSP','protein')
getGeneSummaries('protein')

protein <- read.csv('proteinGeneSummaries_protein.csv',sep=',',header=T)
protein[1:2,c(2:3)]

##    gene
## 1 TPPP3
## 2   DSP
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                EntrezSummary
## 1                                             TPPP3 (Tubulin Polymerization Promoting Protein Family Member 3) is a Protein Coding gene.                                            Diseases associated with TPPP3 include Cerebrospinal Fluid Leak and Creutzfeldt-Jakob Disease.                                                                                        Gene Ontology (GO) annotations related to this gene include tubulin binding.                                            An important paralog of this gene is TPPP.
## 2                                                                                                                                                             This gene encodes a protein that anchors intermediate filaments to desmosomal plaques and forms an obligate component of functional desmosomes. Mutations in this gene are the cause of several cardiomyopathies and keratodermas, including skin fragility-woolly hair syndrome. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Jan 2016]

l <- length(healthyDF_flu7$flu7_mean)
fluDF_20genesOver <- rbind(healthyDF_flu7[1:10,], healthyDF_flu1[1:10,])
fluDF_20genesOver2 <- fluDF_20genesOver[!duplicated(fluDF_20genesOver),]

fluDF_20genesUnder <- rbind(healthyDF_flu7[(l-9):l,],healthyDF_flu1[(l-9):l,])
fluDF_20genesUnder2 <- fluDF_20genesUnder[!duplicated(fluDF_20genesUnder),]

fluGenes <- rbind(fluDF_20genesOver2,fluDF_20genesUnder2)
fluGenes

##          healthy_sample1 healthy_sample2 healthy_sample3 healthy_sample4
## IL6             3.532791        3.506112        3.862225       10.847922
## IL12B           2.859991        2.985050        2.582881        6.932016
## PLA1A           2.783971        2.938718        2.927507        6.053301
## CCL20           3.358975        3.505948        4.622152       10.917330
## CXCL3           2.800063        3.147927        3.577131        7.197023
## IL1A            3.346607        3.086545        3.599909        7.363630
## IFNB1           2.745418        2.842194        3.033243        6.283625
## CXCL1           4.035740        3.614128        4.733442        8.560762
## MN1             3.752404        3.595350        4.361879        5.563545
## CCL2            2.943400        3.020114        2.996591        5.582259
## TPPP3           4.158310        3.923552        4.068103        4.755469
## DSP             4.169059        3.379389        3.150569        4.697270
## SERPING1        2.854054        3.420882        3.278454        2.951903
## ANKRD22         3.971623        4.492860        3.943555        3.730357
## CNN3            3.486748        4.035040        3.670648        3.326756
## IFI27           3.582496        3.674980        3.235038        3.123812
## CXCL10          5.053084        5.597071        5.069216        7.291671
## TEX13B          2.987415        3.020468        3.119884        2.769849
## MAP1S           3.795172        3.803262        4.163974        3.625743
## P2RY2           4.833359        4.578032        4.979556        3.612481
## ALDH8A1         4.264865        4.398943        3.906306        2.698432
## NUAK1           3.730637        3.972498        6.136054        2.541197
## ZNF792          4.117826        4.382712        3.767494        2.760149
## ZFY             5.305066        3.056216        5.622549        2.253746
## IRX3            6.191337        5.745147        4.362552        3.306018
## EIF1AY          8.306616        2.716334        7.844066        2.251399
## PRKY            6.288330        3.626857        6.686264        2.305589
## DDX3Y           7.449197        3.000683        7.698444        2.375035
## USP9Y           7.526885        2.890965        7.288694        2.185536
## RPS4Y1         11.887884        3.515487       11.825050        2.845748
## UGT2B17         4.499824        3.318443        2.669704        2.241949
## UTY             6.074083        3.669161        6.427811        3.178805
## MME             4.085820        4.311034        4.619152        6.748776
## ANXA3           6.172563        7.347994        4.272719        6.606101
##          healthy_sample5 healthy_sample6 healthy_mean flu7_mean flu1_mean
## IL6             7.352172        6.917782     6.003167 11.282219  5.065581
## IL12B           2.862626        2.727428     3.491665  6.114039  2.696345
## PLA1A           4.466492        3.871127     3.840186  6.528760  3.453236
## CCL20          10.395168        7.341819     6.690232 10.853911  6.348397
## CXCL3           6.911485        2.724020     4.392942  7.120630  3.215502
## IL1A            5.552124        3.910816     4.476605  6.964573  3.796884
## IFNB1           2.660872        2.298769     3.310687  5.137388  2.334119
## CXCL1           7.880634        3.082269     5.317829  7.918206  3.753831
## MN1             3.261076        3.864003     4.066376  5.925166  3.707288
## CCL2            3.973207        2.856823     3.562066  5.130118  2.964963
## TPPP3           4.128901        3.825179     4.143252  4.107384  5.051876
## DSP             2.784697        5.645555     3.971090  4.605834  4.833565
## SERPING1        3.052786        2.481578     3.006609  2.697820  3.620393
## ANKRD22         3.349001        3.289842     3.796206  3.598591  4.564063
## CNN3            3.047465        3.191846     3.459751  3.473371  4.112458
## IFI27           6.718688        7.590538     4.654259  3.491754  5.480094
## CXCL10          6.357247        4.094206     5.577082  7.967758  6.536448
## TEX13B          2.635829        2.894804     2.904708  2.933190  3.399027
## MAP1S           4.797805        3.824317     4.001712  3.895361  4.654756
## P2RY2           3.911698        3.956133     4.311876  4.002677  4.979750
## ALDH8A1         3.676365        3.807695     3.792101  2.615699  2.922142
## NUAK1           2.573743        2.600760     3.592482  2.454809  2.550242
## ZNF792          2.722575        3.034552     3.464218  2.301207  2.829504
## ZFY             2.094082        2.260798     3.432076  2.265235  2.250336
## IRX3            2.882966        2.785895     4.212319  2.767155  3.263961
## EIF1AY          2.487563        2.402172     4.334692  2.620722  2.436440
## PRKY            2.464782        2.160181     3.922001  2.364170  2.675763
## DDX3Y           2.296873        2.497235     4.219578  2.418634  2.286559
## USP9Y           2.356722        2.392613     4.106903  2.296698  2.205807
## RPS4Y1          2.836240        3.090078     6.000081  3.031073  2.780916
## UGT2B17         2.504341        2.483071     2.952889  2.532987  2.081791
## UTY             2.927378        3.047398     4.220773  3.048336  2.972960
## MME             6.029147        3.176352     4.828380  5.083470  3.315351
## ANXA3           4.700487        3.050526     5.358398  4.148243  3.514748
##            flu7_FC   flu1_FC
## IL6      1.8793777 0.8438181
## IL12B    1.7510381 0.7722233
## PLA1A    1.7001155 0.8992367
## CCL20    1.6223520 0.9489053
## CXCL3    1.6209253 0.7319702
## IL1A     1.5557711 0.8481615
## IFNB1    1.5517590 0.7050257
## CXCL1    1.4889922 0.7058953
## MN1      1.4571122 0.9116933
## CCL2     1.4402086 0.8323718
## TPPP3    0.9913430 1.2193022
## DSP      1.1598411 1.2171885
## SERPING1 0.8972964 1.2041448
## ANKRD22  0.9479439 1.2022694
## CNN3     1.0039369 1.1886573
## IFI27    0.7502278 1.1774366
## CXCL10   1.4286606 1.1720192
## TEX13B   1.0098053 1.1701784
## MAP1S    0.9734238 1.1631912
## P2RY2    0.9282913 1.1548917
## ALDH8A1  0.6897757 0.7705866
## NUAK1    0.6833184 0.7098832
## ZNF792   0.6642788 0.8167800
## ZFY      0.6600189 0.6556778
## IRX3     0.6569195 0.7748607
## EIF1AY   0.6045925 0.5620791
## PRKY     0.6027968 0.6822444
## DDX3Y    0.5731934 0.5418928
## USP9Y    0.5592288 0.5370975
## RPS4Y1   0.5051719 0.4634797
## UGT2B17  0.8577997 0.7050014
## UTY      0.7222222 0.7043640
## MME      1.0528313 0.6866383
## ANXA3    0.7741572 0.6559325

write.csv(fluGenes,'fluGenesUpDown.csv',row.names=TRUE)

names <- row.names(fluGenes)
names
for (i in names){
  getSummaries(i,'immunity')
  
}

imm <- read.csv('./gene scrapes/immunitysummary.csv',sep=',',header=F)
immH <- read.csv('./gene scrapes/geneHeader_summary_immunity.csv',sep=',',header=T)
colnames(imm) <- immH$x
imm

##    proteinSearched     gene
## 1         immunity      IL6
## 2         immunity    IL12B
## 3         immunity    PLA1A
## 4         immunity    CCL20
## 5         immunity    CXCL3
## 6         immunity     IL1A
## 7         immunity    IFNB1
## 8         immunity    CXCL1
## 9         immunity      MN1
## 10        immunity     CCL2
## 11        immunity    TPPP3
## 12        immunity      DSP
## 13        immunity SERPING1
## 14        immunity  ANKRD22
## 15        immunity     CNN3
## 16        immunity    IFI27
## 17        immunity   CXCL10
## 18        immunity   TEX13B
## 19        immunity    MAP1S
## 20        immunity    P2RY2
## 21        immunity  ALDH8A1
## 22        immunity    NUAK1
## 23        immunity   ZNF792
## 24        immunity      ZFY
## 25        immunity     IRX3
## 26        immunity   EIF1AY
## 27        immunity     PRKY
## 28        immunity    DDX3Y
## 29        immunity    USP9Y
## 30        immunity   RPS4Y1
## 31        immunity  UGT2B17
## 32        immunity      UTY
## 33        immunity      MME
## 34        immunity    ANXA3
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       EntrezSummary
## 1                                                                                                                                                                                                                    This gene encodes a cytokine that functions in inflammation and the maturation of B cells. In addition, the encoded protein has been shown to be an endogenous pyrogen capable of inducing fever in people with autoimmune diseases or infections. The protein is primarily produced at sites of acute and chronic inflammation, where it is secreted into the serum and induces a transcriptional inflammatory response through interleukin 6 receptor, alpha. The functioning of this gene is implicated in a wide variety of inflammation-associated disease states, including suspectibility to diabetes mellitus and systemic juvenile rheumatoid arthritis. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Dec 2015]
## 2  This gene encodes a subunit of interleukin 12, a cytokine that acts on T and natural killer cells, and has a broad array of biological activities. Interleukin 12 is a disulfide-linked heterodimer composed of the 40 kD cytokine receptor like subunit encoded by this gene, and a 35 kD subunit encoded by IL12A. This cytokine is expressed by activated macrophages that serve as an essential inducer of Th1 cells development. This cytokine has been found to be important for sustaining a sufficient number of memory/effector Th1 cells to mediate long-term protection to an intracellular pathogen. Overexpression of this gene was observed in the central nervous system of patients with multiple sclerosis (MS), suggesting a role of this cytokine in the pathogenesis of the disease. The promoter polymorphism of this gene has been reported to be associated with the severity of atopic and non-atopic asthma in children. [provided by RefSeq, Jul 2008]
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              The protein encoded by this gene is a phospholipase that hydrolyzes fatty acids at the sn-1 position of phosphatidylserine and 1-acyl-2-lysophosphatidylserine. This secreted protein hydrolyzes phosphatidylserine in liposomes. Three transcript variants encoding different isoforms have been found for this gene.[provided by RefSeq, May 2011]
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                   This antimicrobial gene belongs to the subfamily of small cytokine CC genes. Cytokines are a family of secreted proteins involved in immunoregulatory and inflammatory processes. The CC cytokines are proteins characterized by two adjacent cysteines. The protein encoded by this gene displays chemotactic activity for lymphocytes and can repress proliferation of myeloid progenitors. Two transcript variants encoding different isoforms have been found for this gene. [provided by RefSeq, Sep 2014]
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              This antimicrobial gene encodes a member of the CXC subfamily of chemokines. The encoded protein is a secreted growth factor that signals through the G-protein coupled receptor, CXC receptor 2. This protein plays a role in inflammation and as a chemoattractant for neutrophils. [provided by RefSeq, Sep 2014]
## 6                                                                                                                                                                                                                                                                                                                         The protein encoded by this gene is a member of the interleukin 1 cytokine family. This cytokine is a pleiotropic cytokine involved in various immune responses, inflammatory processes, and hematopoiesis. This cytokine is produced by monocytes and macrophages as a proprotein, which is proteolytically processed and released in response to cell injury, and thus induces apoptosis. This gene and eight other interleukin 1 family genes form a cytokine gene cluster on chromosome 2. It has been suggested that the polymorphism of these genes is associated with rheumatoid arthritis and Alzheimer's disease. [provided by RefSeq, Jul 2008]
## 7                                                                                                         This gene encodes a cytokine that belongs to the interferon family of signaling proteins, which are released as part of the innate immune response to pathogens. The protein encoded by this gene belongs to the type I class of interferons, which are important for defense against viral infections. In addition, type I interferons are involved in cell differentiation and anti-tumor defenses. Following secretion in response to a pathogen, type I interferons bind a homologous receptor complex and induce transcription of genes such as those encoding inflammatory cytokines and chemokines. Overactivation of type I interferon secretion is linked to autoimmune diseases. Mice deficient for this gene display several phenotypes including defects in B cell maturation and increased susceptibility to viral infection. [provided by RefSeq, Sep 2015]
## 8                                                                                                                                                                                                                                                                                                                                 This antimicrobial gene encodes a member of the CXC subfamily of chemokines. The encoded protein is a secreted growth factor that signals through the G-protein coupled receptor, CXC receptor 2. This protein plays a role in inflammation and as a chemoattractant for neutrophils. Aberrant expression of this protein is associated with the growth and progression of certain tumors. A naturally occurring processed form of this protein has increased chemotactic activity. Alternate splicing results in coding and non-coding variants of this gene. A pseudogene of this gene is found on chromosome 4. [provided by RefSeq, Sep 2014]
## 9                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Meningioma 1 (MN1) contains two sets of CAG repeats. It is disrupted by a balanced translocation (4;22) in a meningioma, and its inactivation may contribute to meningioma 32 pathogenesis. [provided by RefSeq, Jul 2008]
## 10                                                                                                                                                                                  This gene is one of several cytokine genes clustered on the q-arm of chromosome 17. Chemokines are a superfamily of secreted proteins involved in immunoregulatory and inflammatory processes. The superfamily is divided into four subfamilies based on the arrangement of N-terminal cysteine residues of the mature peptide. This chemokine is a member of the CC subfamily which is characterized by two adjacent cysteine residues. This cytokine displays chemotactic activity for monocytes and basophils but not for neutrophils or eosinophils. It has been implicated in the pathogenesis of diseases characterized by monocytic infiltrates, like psoriasis, rheumatoid arthritis and atherosclerosis. It binds to chemokine receptors CCR2 and CCR4. [provided by RefSeq, Jul 2013]
## 11                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   TPPP3 (Tubulin Polymerization Promoting Protein Family Member 3) is a Protein Coding gene.                                            Diseases associated with TPPP3 include Cerebrospinal Fluid Leak and Creutzfeldt-Jakob Disease.                                                                                        Gene Ontology (GO) annotations related to this gene include tubulin binding.                                            An important paralog of this gene is TPPP.
## 12                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   This gene encodes a protein that anchors intermediate filaments to desmosomal plaques and forms an obligate component of functional desmosomes. Mutations in this gene are the cause of several cardiomyopathies and keratodermas, including skin fragility-woolly hair syndrome. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Jan 2016]
## 13                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  This gene encodes a highly glycosylated plasma protein involved in the regulation of the complement cascade. Its encoded protein, C1 inhibitor, inhibits activated C1r and C1s of the first complement component and thus regulates complement activation. It is synthesized in the liver, and its deficiency is associated with hereditary angioneurotic oedema (HANE). Alternative splicing results in multiple transcript variants encoding the same isoform. [provided by RefSeq, May 2020]
## 14                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         ANKRD22 (Ankyrin Repeat Domain 22) is a Protein Coding gene.                                                                                                                                                                                An important paralog of this gene is PLA2G6.
## 15                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     This gene encodes a protein with a markedly acidic C terminus; the basic N-terminus is highly homologous to the N-terminus of a related gene, CNN1. Members of the CNN gene family all contain similar tandemly repeated motifs. This encoded protein is associated with the cytoskeleton but is not involved in contraction. [provided by RefSeq, Jul 2008]
## 16                                                                                                                                                                                                                                                                                                                                                                    IFI27 (Interferon Alpha Inducible Protein 27) is a Protein Coding gene.                                            Diseases associated with IFI27 include Hepatitis C Virus and Oral Leukoplakia.                                            Among its related pathways are Interferon gamma signaling and Innate Immune System.                                            Gene Ontology (GO) annotations related to this gene include RNA polymerase II activating transcription factor binding and lamin binding.                                            An important paralog of this gene is IFI27L2.
## 17                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     This antimicrobial gene encodes a chemokine of the CXC subfamily and ligand for the receptor CXCR3. Binding of this protein to CXCR3 results in pleiotropic effects, including stimulation of monocytes, natural killer and T-cell migration, and modulation of adhesion molecule expression. [provided by RefSeq, Sep 2014]
## 18                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             This gene is similar to a mouse gene that is expressed in the testis. [provided by RefSeq, Jul 2008]
## 19                                                                                                                                                                                                                                                                                                                                                                                                                                   MAP1S (Microtubule Associated Protein 1S) is a Protein Coding gene.                                            Diseases associated with MAP1S include Giant Axonal Neuropathy 1, Autosomal Recessive and Leigh Syndrome, French Canadian Type.                                                                                        Gene Ontology (GO) annotations related to this gene include identical protein binding and actin filament binding.                                            An important paralog of this gene is MAP1B.
## 20                                                                                                                                                                                                                                               The product of this gene belongs to the family of P2 receptors, which is activated by extracellular nucleotides and subdivided into P2X ligand-gated ion channels and P2Y G-protein coupled receptors. This family has several receptor subtypes with different pharmacological selectivity, which overlaps in some cases, for various adenosine and uridine nucleotides. This receptor, found on many cell types, is activated by ATP and UTP and is reported to be overexpressed on some cancer cell types. It is involved in many cellular functions, such as proliferation, apoptosis and inflammation. Three transcript variants encoding the same protein have been identified for this gene. [provided by RefSeq, Mar 2013]
## 21                                                                                                                                                                                                                                                                                                                                                                This gene encodes a member of the aldehyde dehydrogenase family of proteins. The encoded protein has been implicated in the synthesis of 9-cis-retinoic acid and in the breakdown of the amino acid tryptophan. This enzyme converts 9-cis-retinal into the retinoid X receptor ligand 9-cis-retinoic acid, and has approximately 40-fold higher activity with 9-cis-retinal than with all-trans-retinal. In addition, this enzyme has been shown to catalyze the conversion of 2-aminomuconic semialdehyde to 2-aminomuconate in the kynurenine pathway of tryptophan catabolism. [provided by RefSeq, Jul 2018]
## 22                                                                                                                                                                                                                                                                                                                                                     NUAK1 (NUAK Family Kinase 1) is a Protein Coding gene.                                            Diseases associated with NUAK1 include Endometrial Squamous Cell Carcinoma and Omphalocele.                                            Among its related pathways are Regulation of TP53 Activity and Gene Expression.                                            Gene Ontology (GO) annotations related to this gene include transferase activity, transferring phosphorus-containing groups and protein tyrosine kinase activity.                                            An important paralog of this gene is NUAK2.
## 23                                                                                                                                                                                                                                                                                                                                                                                                                                                        ZNF792 (Zinc Finger Protein 792) is a Protein Coding gene.                                            Diseases associated with ZNF792 include Spherocytosis, Type 5.                                            Among its related pathways are Gene Expression and Herpes simplex virus 1 infection.                                            Gene Ontology (GO) annotations related to this gene include nucleic acid binding.                                            An important paralog of this gene is ZNF304.
## 24                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               This gene encodes a zinc finger-containing protein that may function as a transcription factor. This gene was once a candidate gene for the testis-determining factor (TDF) and was erroneously referred to as TDF. [provided by RefSeq, Jul 2008]
## 25                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         IRX3 is a member of the Iroquois homeobox gene family (see IRX1; MIM 606197) and plays a role in an early step of neural development (Bellefroid et al., 1998 [PubMed 9427753]). Members of this family appear to play multiple roles during pattern formation of vertebrate embryos (Lewis et al., 1999 [PubMed 10370142]).[supplied by OMIM, Aug 2009]
## 26                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     This gene is located on the non-recombining region of the Y chromosome. It encodes a protein related to eukaryotic translation initiation factor 1A (EIF1A), which may function in stabilizing the binding of the initiator Met-tRNA to 40S ribosomal subunits. Alternative splicing results in multiple transcript variants. [provided by RefSeq, Jul 2013]
## 27                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               This gene is similar to the protein kinase, X-linked gene in the pseudoautosomal region of the X chromosome. The gene is classified as a transcribed pseudogene because it has lost a coding exon that results in all transcripts being candidates for nonsense-mediated decay (NMD) and unlikely to express a protein. Abnormal recombination between this gene and a related gene on chromosome X is a frequent cause of XX males and XY females. [provided by RefSeq, Jul 2010]
## 28                                                                                                                                              The protein encoded by this gene is a member of the DEAD-box RNA helicase family, characterized by nine conserved motifs, included the conserved Asp-Glu-Ala-Asp (DEAD) motif. These motifs are thought to be involved in ATP binding, hydrolysis, RNA binding, and in the formation of intramolecular interactions. This protein shares high similarity to DDX3X, on the X chromosome, but a deletion of this gene is not complemented by DDX3X. Mutations in this gene result in male infertility, a reduction in germ cell numbers, and can result in Sertoli-cell only sydrome. Pseudogenes sharing similarity to both this gene and the DDX3X paralog are found on chromosome 4 and the X chromosome. Alternative splicing results in multiple transcript variants encoding different isoforms. [provided by RefSeq, Oct 2014]
## 29                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          This gene is a member of the peptidase C19 family. It encodes a protein that is similar to ubiquitin-specific proteases, which cleave the ubiquitin moiety from ubiquitin-fused precursors and ubiquitinylated proteins. [provided by RefSeq, Mar 2009]
## 30     Cytoplasmic ribosomes, organelles that catalyze protein synthesis, consist of a small 40S subunit and a large 60S subunit. Together these subunits are composed of 4 RNA species and approximately 80 structurally distinct proteins. This gene encodes ribosomal protein S4, a component of the 40S subunit. Ribosomal protein S4 is the only ribosomal protein known to be encoded by more than one gene, namely this gene and ribosomal protein S4, X-linked (RPS4X). The 2 isoforms encoded by these genes are not identical, but are functionally equivalent. Ribosomal protein S4 belongs to the S4E family of ribosomal proteins. It has been suggested that haploinsufficiency of the ribosomal protein S4 genes plays a role in Turner syndrome; however, this hypothesis is controversial. As is typical for genes encoding ribosomal proteins, there are multiple processed pseudogenes of this gene dispersed through the genome. [provided by RefSeq, Jul 2008]
## 31                                                                                                                                                                                                                                                                                                                                                                                                                                                                            This gene encodes a member of the uridine diphosphoglucuronosyltransferase protein family. The encoded enzyme catalyzes the transfer of glucuronic acid from uridine diphosphoglucuronic acid to a diverse array of substrates including steroid hormones and lipid-soluble drugs. This process, known as glucuronidation, is an intermediate step in the metabolism of steroids. Copy number variation in this gene is associated with susceptibility to osteoporosis.[provided by RefSeq, Apr 2010]
## 32                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  This gene encodes a protein containing tetratricopeptide repeats which are thought to be involved in protein-protein interactions. The encoded protein is also a minor histocompatibility antigen which may induce graft rejection of male stem cell grafts. A large number of alternatively spliced transcripts have been observed for this gene, but the full length nature of some of these variants has not been determined. [provided by RefSeq, Apr 2012]
## 33                                                                                                                                                                                                                                                              The protein encoded by this gene is a type II transmembrane glycoprotein and a common acute lymphocytic leukemia antigen that is an important cell surface marker in the diagnosis of human acute lymphocytic leukemia (ALL). The encoded protein is present on leukemic cells of pre-B phenotype, which represent 85% of cases of ALL. This protein is not restricted to leukemic cells, however, and is found on a variety of normal tissues. The protein is a neutral endopeptidase that cleaves peptides at the amino side of hydrophobic residues and inactivates several peptide hormones including glucagon, enkephalins, substance P, neurotensin, oxytocin, and bradykinin. [provided by RefSeq, Aug 2017]
## 34                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    This gene encodes a member of the annexin family.  Members of this calcium-dependent phospholipid-binding protein family play a role in the regulation of cellular growth and in signal transduction pathways.  This protein functions in the inhibition of phopholipase A2 and cleavage of inositol 1,2-cyclic phosphate to form inositol 1-phosphate. This protein may also play a role in anti-coagulation. [provided by RefSeq, Jul 2008]
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   GeneCardsSummary
## 1                                                                                                                                                                     IL6 (Interleukin 6) is a Protein Coding gene.                                            Diseases associated with IL6 include Kaposi Sarcoma and Rheumatoid Arthritis, Systemic Juvenile.                                            Among its related pathways are Transcriptional misregulation in cancer and Folate Metabolism.                                            Gene Ontology (GO) annotations related to this gene include signaling receptor binding and growth factor activity.                                            
## 2                                                                                                                                                                                 IL12B (Interleukin 12B) is a Protein Coding gene.                                            Diseases associated with IL12B include Immunodeficiency 29 and Takayasu Arteritis.                                            Among its related pathways are Tuberculosis and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include protein homodimerization activity and protein heterodimerization activity.                                            
## 3                                                                                                                                            PLA1A (Phospholipase A1 Member A) is a Protein Coding gene.                                            Diseases associated with PLA1A include Muscular Dystrophy, Congenital, Lmna-Related.                                            Among its related pathways are Metabolism and Glycerophospholipid biosynthesis.                                            Gene Ontology (GO) annotations related to this gene include hydrolase activity and phospholipase A1 activity.                                            An important paralog of this gene is LIPH.
## 4                                                                                                                                              CCL20 (C-C Motif Chemokine Ligand 20) is a Protein Coding gene.                                            Diseases associated with CCL20 include Sialadenitis and Pustulosis Palmaris Et Plantaris.                                            Among its related pathways are Signaling by GPCR and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include cytokine activity and chemokine activity.                                            An important paralog of this gene is CCL4.
## 5                                                                                                                                                                       CXCL3 (C-X-C Motif Chemokine Ligand 3) is a Protein Coding gene.                                            Diseases associated with CXCL3 include Melanoma.                                            Among its related pathways are Signaling by GPCR and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include chemokine activity and CXCR chemokine receptor binding.                                            An important paralog of this gene is CXCL2.
## 6                                                                                                                                                             IL1A (Interleukin 1 Alpha) is a Protein Coding gene.                                            Diseases associated with IL1A include Arthritis and Cholesteatoma Of Middle Ear.                                            Among its related pathways are NF-KappaB Family Pathway and Diseases associated with the TLR signaling cascade.                                            Gene Ontology (GO) annotations related to this gene include cytokine activity and interleukin-1 receptor binding.                                            
## 7                                                                                        IFNB1 (Interferon Beta 1) is a Protein Coding gene.                                            Diseases associated with IFNB1 include Secondary Progressive Multiple Sclerosis and Primary Progressive Multiple Sclerosis.                                            Among its related pathways are all-trans-Retinoic Acid Mediated Apoptosis and Tuberculosis.                                            Gene Ontology (GO) annotations related to this gene include cytokine activity and type I interferon receptor binding.                                            An important paralog of this gene is IFNA2.
## 8                                                                                                                              CXCL1 (C-X-C Motif Chemokine Ligand 1) is a Protein Coding gene.                                            Diseases associated with CXCL1 include Helicobacter Pylori Infection and Bacterial Meningitis.                                            Among its related pathways are Signaling by GPCR and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include signaling receptor binding and chemokine activity.                                            An important paralog of this gene is CXCL2.
## 9                                                                                                                                                                                                                                                                                                                                                                     MN1 (MN1 Proto-Oncogene, Transcriptional Regulator) is a Protein Coding gene.                                            Diseases associated with MN1 include Cebalid Syndrome and Meningioma, Familial.                                                                                                                                    
## 10                                                                                       CCL2 (C-C Motif Chemokine Ligand 2) is a Protein Coding gene.                                            Diseases associated with CCL2 include Neural Tube Defects and Human Immunodeficiency Virus Type 1.                                            Among its related pathways are G-protein signaling Ras family GTPases in kinase cascades (scheme) and Folate Metabolism.                                            Gene Ontology (GO) annotations related to this gene include protein kinase activity and heparin binding.                                            An important paralog of this gene is CCL7.
## 11                                                                                                                                                                                                                  TPPP3 (Tubulin Polymerization Promoting Protein Family Member 3) is a Protein Coding gene.                                            Diseases associated with TPPP3 include Cerebrospinal Fluid Leak and Creutzfeldt-Jakob Disease.                                                                                        Gene Ontology (GO) annotations related to this gene include tubulin binding.                                            An important paralog of this gene is TPPP.
## 12                                                                                                        DSP (Desmoplakin) is a Protein Coding gene.                                            Diseases associated with DSP include Skin Fragility-Woolly Hair Syndrome and Cardiomyopathy, Dilated, With Woolly Hair And Keratoderma.                                            Among its related pathways are Cytoskeleton remodeling Neurofilaments and Keratinization.                                            Gene Ontology (GO) annotations related to this gene include structural constituent of cytoskeleton.                                            An important paralog of this gene is PLEC.
## 13                                                                  SERPING1 (Serpin Family G Member 1) is a Protein Coding gene.                                            Diseases associated with SERPING1 include Angioedema, Hereditary, Type I and Complement Component 4, Partial Deficiency Of.                                            Among its related pathways are Formation of Fibrin Clot (Clotting Cascade) and Cell adhesion_Plasmin signaling.                                            Gene Ontology (GO) annotations related to this gene include serine-type endopeptidase inhibitor activity.                                            An important paralog of this gene is SERPINF2.
## 14                                                                                                                                                                                                                                                                                                                                                                                                                        ANKRD22 (Ankyrin Repeat Domain 22) is a Protein Coding gene.                                                                                                                                                                                An important paralog of this gene is PLA2G6.
## 15                                                                                                                                                                                                                                                                                                                                          CNN3 (Calponin 3) is a Protein Coding gene.                                                                                                                                    Gene Ontology (GO) annotations related to this gene include actin binding and calmodulin binding.                                            An important paralog of this gene is CNN1.
## 16                                                                                                   IFI27 (Interferon Alpha Inducible Protein 27) is a Protein Coding gene.                                            Diseases associated with IFI27 include Hepatitis C Virus and Oral Leukoplakia.                                            Among its related pathways are Interferon gamma signaling and Innate Immune System.                                            Gene Ontology (GO) annotations related to this gene include RNA polymerase II activating transcription factor binding and lamin binding.                                            An important paralog of this gene is IFI27L2.
## 17                                                                                                                                                   CXCL10 (C-X-C Motif Chemokine Ligand 10) is a Protein Coding gene.                                            Diseases associated with CXCL10 include Hepatitis C and Endotheliitis.                                            Among its related pathways are Signaling by GPCR and PEDF Induced Signaling.                                            Gene Ontology (GO) annotations related to this gene include signaling receptor binding and chemokine activity.                                            An important paralog of this gene is CXCL9.
## 18                                                                                                                                                                                                                                                                                                                                                                                                                             TEX13B (Testis Expressed 13B) is a Protein Coding gene.                                                                                                                                                                                An important paralog of this gene is TEX13A.
## 19                                                                                                                                                                  MAP1S (Microtubule Associated Protein 1S) is a Protein Coding gene.                                            Diseases associated with MAP1S include Giant Axonal Neuropathy 1, Autosomal Recessive and Leigh Syndrome, French Canadian Type.                                                                                        Gene Ontology (GO) annotations related to this gene include identical protein binding and actin filament binding.                                            An important paralog of this gene is MAP1B.
## 20                                                                                                    P2RY2 (Purinergic Receptor P2Y2) is a Protein Coding gene.                                            Diseases associated with P2RY2 include Lacrimal Apparatus Disease and Cystic Fibrosis.                                            Among its related pathways are RET signaling and Signaling by GPCR.                                            Gene Ontology (GO) annotations related to this gene include G protein-coupled receptor activity and G protein-coupled purinergic nucleotide receptor activity.                                            An important paralog of this gene is P2RY4.
## 21                                                                                                                                   ALDH8A1 (Aldehyde Dehydrogenase 8 Family Member A1) is a Protein Coding gene.                                                                                        Among its related pathways are Signaling by GPCR and Tryptophan metabolism.                                            Gene Ontology (GO) annotations related to this gene include oxidoreductase activity and oxidoreductase activity, acting on the aldehyde or oxo group of donors, NAD or NADP as acceptor.                                            An important paralog of this gene is ALDH1A3.
## 22                                                                                    NUAK1 (NUAK Family Kinase 1) is a Protein Coding gene.                                            Diseases associated with NUAK1 include Endometrial Squamous Cell Carcinoma and Omphalocele.                                            Among its related pathways are Regulation of TP53 Activity and Gene Expression.                                            Gene Ontology (GO) annotations related to this gene include transferase activity, transferring phosphorus-containing groups and protein tyrosine kinase activity.                                            An important paralog of this gene is NUAK2.
## 23                                                                                                                                                                                       ZNF792 (Zinc Finger Protein 792) is a Protein Coding gene.                                            Diseases associated with ZNF792 include Spherocytosis, Type 5.                                            Among its related pathways are Gene Expression and Herpes simplex virus 1 infection.                                            Gene Ontology (GO) annotations related to this gene include nucleic acid binding.                                            An important paralog of this gene is ZNF304.
## 24                                                                                                                                                                                                                                                                                                                                             ZFY (Zinc Finger Protein Y-Linked) is a Protein Coding gene.                                            Diseases associated with ZFY include Hermaphroditism and Campomelic Dysplasia.                                                                                                                                    An important paralog of this gene is ZFX.
## 25                                                                                                                                                                         IRX3 (Iroquois Homeobox 3) is a Protein Coding gene.                                            Diseases associated with IRX3 include Transient Refractive Change and Lateral Displacement Of Eye.                                            Among its related pathways are FTO Obesity Variant Mechanism.                                            Gene Ontology (GO) annotations related to this gene include sequence-specific DNA binding.                                            An important paralog of this gene is IRX1.
## 26                                                                                                                                                                        EIF1AY (Eukaryotic Translation Initiation Factor 1A Y-Linked) is a Protein Coding gene.                                                                                        Among its related pathways are Translation Factors and Apoptotic Pathways in Synovial Fibroblasts.                                            Gene Ontology (GO) annotations related to this gene include RNA binding and translation initiation factor activity.                                            An important paralog of this gene is EIF1AX.
## 27                                                                                                                                                                                                                                                                                                                     PRKY (Protein Kinase Y-Linked (Pseudogene)) is a Pseudogene.                                                                                                                                    Gene Ontology (GO) annotations related to this gene include protein serine/threonine kinase activity.                                            An important paralog of this gene is PRKX.
## 28                                                                                                  DDX3Y (DEAD-Box Helicase 3 Y-Linked) is a Protein Coding gene.                                            Diseases associated with DDX3Y include Spermatogenic Failure, Y-Linked, 2 and Partial Deletion Of Y.                                            Among its related pathways are RIG-I/MDA5 mediated induction of IFN-alpha/beta pathways.                                            Gene Ontology (GO) annotations related to this gene include nucleic acid binding and ATP-dependent RNA helicase activity.                                            An important paralog of this gene is DDX3X.
## 29                                                                                                                     USP9Y (Ubiquitin Specific Peptidase 9 Y-Linked) is a Protein Coding gene.                                            Diseases associated with USP9Y include Spermatogenic Failure, Y-Linked, 2 and Partial Deletion Of Y.                                            Among its related pathways are Ubiquitin-Proteasome Dependent Proteolysis.                                            Gene Ontology (GO) annotations related to this gene include binding and cysteine-type peptidase activity.                                            An important paralog of this gene is USP9X.
## 30                                                                            RPS4Y1 (Ribosomal Protein S4 Y-Linked 1) is a Protein Coding gene.                                            Diseases associated with RPS4Y1 include Turner Syndrome.                                            Among its related pathways are Activation of the mRNA upon binding of the cap-binding complex and eIFs, and subsequent binding to 43S and rRNA processing in the nucleus and cytosol.                                            Gene Ontology (GO) annotations related to this gene include RNA binding and rRNA binding.                                            An important paralog of this gene is RPS4Y2.
## 31                                             UGT2B17 (UDP Glucuronosyltransferase Family 2 Member B17) is a Protein Coding gene.                                            Diseases associated with UGT2B17 include Bone Mineral Density Quantitative Trait Locus 12 and Osteoporosis.                                            Among its related pathways are Drug metabolism - cytochrome P450 and Cytochrome P450 - arranged by substrate type.                                            Gene Ontology (GO) annotations related to this gene include carbohydrate binding and glucuronosyltransferase activity.                                            An important paralog of this gene is UGT2B15.
## 32                                                                                 UTY (Ubiquitously Transcribed Tetratricopeptide Repeat Containing, Y-Linked) is a Protein Coding gene.                                                                                        Among its related pathways are Chromatin organization and Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3.                                            Gene Ontology (GO) annotations related to this gene include dioxygenase activity and histone demethylase activity (H3-K27 specific).                                            An important paralog of this gene is KDM6A.
## 33                                                                                                   MME (Membrane Metalloendopeptidase) is a Protein Coding gene.                                            Diseases associated with MME include Charcot-Marie-Tooth Disease, Axonal, Type 2T and Spinocerebellar Ataxia 43.                                            Among its related pathways are Collagen chain trimerization and Peptide hormone metabolism.                                            Gene Ontology (GO) annotations related to this gene include peptidase activity and endopeptidase activity.                                            An important paralog of this gene is MMEL1.
## 34                                                                                                                                                              ANXA3 (Annexin A3) is a Protein Coding gene.                                            Diseases associated with ANXA3 include Ovarian Cancer and Prostate Cancer.                                            Among its related pathways are Prostaglandin Synthesis and Regulation.                                            Gene Ontology (GO) annotations related to this gene include calcium ion binding and calcium-dependent phospholipid binding.                                            An important paralog of this gene is ANXA4.
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     UniProtKB_Summary
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Cytokine with a wide variety of biological functions. It is a potent inducer of the acute phase response. Plays an essential role in the final differentiation of B-cells into Ig-secreting cells Involved in lymphocyte and monocyte differentiation. Acts on B-cells, T-cells, hepatocytes, hematopoietic progenitor cells and cells of the CNS. Required for the generation of T(H)17 cells. Also acts as a myokine. It is discharged into the bloodstream after muscle contraction and acts to increase the breakdown of fats and to improve insulin resistance. It induces myeloma and plasmacytoma growth and induces nerve cells differentiation.\n                         IL6_HUMAN,P05231\n                         
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Cytokine that can act as a growth factor for activated T and NK cells, enhance the lytic activity of NK/lymphokine-activated killer cells, and stimulate the production of IFN-gamma by resting PBMC.\n                         IL12B_HUMAN,P29460\n                         
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Hydrolyzes the ester bond at the sn-1 position of glycerophospholipids and produces 2-acyl lysophospholipids. Hydrolyzes phosphatidylserine (PS) in the form of liposomes and 1-acyl-2 lysophosphatidylserine (lyso-PS), but not triolein, phosphatidylcholine (PC), phosphatidylethanolamine (PE), phosphatidic acid (PA) or phosphatidylinositol (PI). Isoform 2 hydrolyzes lyso-PS but not PS. Hydrolysis of lyso-PS in peritoneal mast cells activated by receptors for IgE leads to stimulate histamine production.\n                         PLA1A_HUMAN,Q53H76\n                         
## 4  Acts as a ligand for C-C chemokine receptor CCR6. Signals through binding and activation of CCR6 and induces a strong chemotactic response and mobilization of intracellular calcium ions (PubMed:11352563, PubMed:11035086, PubMed:20068036). The ligand-receptor pair CCL20-CCR6 is responsible for the chemotaxis of dendritic cells (DC), effector/memory T-cells and B-cells and plays an important role at skin and mucosal surfaces under homeostatic and inflammatory conditions, as well as in pathology, including cancer and various autoimmune diseases (PubMed:21376174). CCL20 acts as a chemotactic factor that attracts lymphocytes and, slightly, neutrophils, but not monocytes (PubMed:9038201, PubMed:11352563). Involved in the recruitment of both the proinflammatory IL17 producing helper T-cells (Th17) and the regulatory T-cells (Treg) to sites of inflammation. Required for optimal migration of thymic natural regulatory T cells (nTregs) and DN1 early thymocyte progenitor cells (By similarity). C-terminal processed forms have been shown to be equally chemotactically active for leukocytes (PubMed:11035086). Positively regulates sperm motility and chemotaxis via its binding to CCR6 which triggers Ca2+ mobilization in the sperm which is important for its motility (PubMed:23765988, PubMed:25122636). Inhibits proliferation of myeloid progenitors in colony formation assays (PubMed:9129037). May be involved in formation and function of the mucosal lymphoid tissues by attracting lymphocytes and dendritic cells towards epithelial cells (By similarity). Possesses antibacterial activity towards E.coli ATCC 25922 and S.aureus ATCC 29213 (PubMed:12149255).\n                         CCL20_HUMAN,P78556\n                         
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Ligand for CXCR2 (By similarity). Has chemotactic activity for neutrophils. May play a role in inflammation and exert its effects on endothelial cells in an autocrine fashion. In vitro, the processed form GRO-gamma(5-73) shows a fivefold higher chemotactic activity for neutrophilic granulocytes.\n                         CXCL3_HUMAN,P19876\n                         
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Produced by activated macrophages, IL-1 stimulates thymocyte proliferation by inducing IL-2 release, B-cell maturation and proliferation, and fibroblast growth factor activity. IL-1 proteins are involved in the inflammatory response, being identified as endogenous pyrogens, and are reported to stimulate the release of prostaglandin and collagenase from synovial cells.\n                         IL1A_HUMAN,P01583\n                         
## 7                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Has antiviral, antibacterial and anticancer activities.\n                         IFNB_HUMAN,P01574\n                         
## 8                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Has chemotactic activity for neutrophils. May play a role in inflammation and exerts its effects on endothelial cells in an autocrine fashion. In vitro, the processed forms GRO-alpha(4-73), GRO-alpha(5-73) and GRO-alpha(6-73) show a 30-fold higher chemotactic activity.\n                         GROA_HUMAN,P09341\n                         
## 9                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Transcriptional activator which specifically regulates expression of TBX22 in the posterior region of the developing palate. Required during later stages of palate development for growth and medial fusion of the palatal shelves. Promotes maturation and normal function of calvarial osteoblasts, including expression of the osteoclastogenic cytokine TNFSF11/RANKL. Necessary for normal development of the membranous bones of the skull (By similarity). May play a role in tumor suppression (Probable).\n                         MN1_HUMAN,Q10571\n                         
## 10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Acts as a ligand for C-C chemokine receptor CCR2 (PubMed:9837883, PubMed:10587439, PubMed:10529171). Signals through binding and activation of CCR2 and induces a strong chemotactic response and mobilization of intracellular calcium ions (PubMed:9837883, PubMed:10587439). Exhibits a chemotactic activity for monocytes and basophils but not neutrophils or eosinophils (PubMed:8627182, PubMed:9792674, PubMed:8195247). May be involved in the recruitment of monocytes into the arterial wall during the disease process of atherosclerosis (PubMed:8107690).\n                         CCL2_HUMAN,P13500\n                         
## 11                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Regulator of microtubule dynamic that has microtubule bundling activity (PubMed:17105200, PubMed:19633818). Required for embryo implantation; possibly by regulating beta-catenin (By similarity). Also required for decidualization via regulation of beta-catenin (PubMed:30667362).\n                         TPPP3_HUMAN,Q9BW30\n                         
## 12                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Major high molecular weight protein of desmosomes. Involved in the organization of the desmosomal cadherin-plakoglobin complexes into discrete plasma membrane domains and in the anchoring of intermediate filaments to the desmosomes.\n                         DESP_HUMAN,P15924\n                         
## 13                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Activation of the C1 complex is under control of the C1-inhibitor. It forms a proteolytically inactive stoichiometric complex with the C1r or C1s proteases. May play a potentially crucial role in regulating important physiological pathways including complement activation, blood coagulation, fibrinolysis and the generation of kinins. Very efficient inhibitor of FXIIa. Inhibits chymotrypsin and kallikrein.\n                         IC1_HUMAN,P05155\n                         
## 14                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         no summary
## 15                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Thin filament-associated protein that is implicated in the regulation and modulation of smooth muscle contraction. It is capable of binding to actin, calmodulin, troponin C and tropomyosin. The interaction of calponin with actin inhibits the actomyosin Mg-ATPase activity.\n                         CNN3_HUMAN,Q15417\n                         
## 16                                                                                                                                                                                                                                                                                                                                                                                                                                                        Probable adapter protein involved in different biological processes (PubMed:22427340, PubMed:27194766). Part of the signaling pathways that lead to apoptosis (PubMed:18330707, PubMed:27673746, PubMed:24970806). Involved in type-I interferon-induced apoptosis characterized by a rapid and robust release of cytochrome C from the mitochondria and activation of BAX and caspases 2, 3, 6, 8 and 9 (PubMed:18330707, PubMed:27673746). Also functions in TNFSF10-induced apoptosis (PubMed:24970806). May also have a function in the nucleus, where it may be involved in the interferon-induced negative regulation of the transcriptional activity of NR4A1, NR4A2 and NR4A3 through the enhancement of XPO1-mediated nuclear export of these nuclear receptors (PubMed:22427340). May thereby play a role in the vascular response to injury (By similarity). In the innate immune response, has an antiviral activity towards hepatitis C virus/HCV (PubMed:27194766, PubMed:27777077). May prevent the replication of the virus by recruiting both the hepatitis C virus non-structural protein 5A/NS5A and the ubiquitination machinery via SKP2, promoting the ubiquitin-mediated proteasomal degradation of NS5A (PubMed:27194766, PubMed:27777077).\n                         IFI27_HUMAN,P40305\n                         
## 17                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Pro-inflammatory cytokine that is involved in a wide variety of processes such as chemotaxis, differentiation, and activation of peripheral immune cells, regulation of cell growth, apoptosis and modulation of angiostatic effects (PubMed:7540647, PubMed:11157474, PubMed:22652417). Plays thereby an important role during viral infections by stimulating the activation and migration of immune cells to the infected sites (By similarity). Mechanistically, binding of CXCL10 to the CXCR3 receptor activates G protein-mediated signaling and results in downstream activation of phospholipase C-dependent pathway, an increase in intracellular calcium production and actin reorganization (PubMed:12750173, PubMed:19151743). In turn, recruitment of activated Th1 lymphocytes occurs at sites of inflammation (PubMed:12750173, PubMed:12663757). Activation of the CXCL10/CXCR3 axis plays also an important role in neurons in response to brain injury for activating microglia, the resident macrophage population of the central nervous system, and directing them to the lesion site. This recruitment is an essential element for neuronal reorganization (By similarity).\n                         CXL10_HUMAN,P02778\n                         
## 18                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         no summary
## 19                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Microtubule-associated protein that mediates aggregation of mitochondria resulting in cell death and genomic destruction (MAGD). Plays a role in anchoring the microtubule organizing center to the centrosomes. Binds to DNA. Plays a role in apoptosis. Involved in the formation of microtubule bundles (By similarity).\n                         MAP1S_HUMAN,Q66K74\n                         
## 20                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Receptor for ATP and UTP coupled to G-proteins that activate a phosphatidylinositol-calcium second messenger system. The affinity range is UTP = ATP > ATP-gamma-S >> 2-methylthio-ATP = ADP.\n                         P2RY2_HUMAN,P41231\n                         
## 21                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      Catalyzes the NAD-dependent oxidation of 2-aminomuconic semialdehyde of the kynurenine metabolic pathway in L-tryptophan degradation.\n                         AL8A1_HUMAN,Q9H2A2\n                         
## 22                                                                                                                                                                                                                                                                      Serine/threonine-protein kinase involved in various processes such as cell adhesion, regulation of cell ploidy and senescence, cell proliferation and tumor progression. Phosphorylates ATM, CASP6, LATS1, PPP1R12A and p53/TP53. Acts as a regulator of cellular senescence and cellular ploidy by mediating phosphorylation of 'Ser-464' of LATS1, thereby controlling its stability. Controls cell adhesion by regulating activity of the myosin protein phosphatase 1 (PP1) complex. Acts by mediating phosphorylation of PPP1R12A subunit of myosin PP1: phosphorylated PPP1R12A then interacts with 14-3-3, leading to reduced dephosphorylation of myosin MLC2 by myosin PP1. May be involved in DNA damage response: phosphorylates p53/TP53 at 'Ser-15' and 'Ser-392' and is recruited to the CDKN1A/WAF1 promoter to participate in transcription activation by p53/TP53. May also act as a tumor malignancy-associated factor by promoting tumor invasion and metastasis under regulation and phosphorylation by AKT1. Suppresses Fas-induced apoptosis by mediating phosphorylation of CASP6, thereby suppressing the activation of the caspase and the subsequent cleavage of CFLAR. Regulates UV radiation-induced DNA damage response mediated by CDKN1A. In association with STK11, phosphorylates CDKN1A in response to UV radiation and contributes to its degradation which is necessary for optimal DNA repair (PubMed:25329316).\n                         NUAK1_HUMAN,O60285\n                         
## 23                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             May be involved in transcriptional regulation.\n                         ZN792_HUMAN,Q3KQV3\n                         
## 24                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Probable transcriptional activator. Binds to the consensus sequence 5'-AGGCCY-3'.\n                         ZFY_HUMAN,P08048\n                         
## 25                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Transcription factor involved in SHH-dependent neural patterning. Together with NKX2-2 and NKX6-1 acts to restrict the generation of motor neurons to the appropriate region of the neural tube. Belongs to the class I proteins of neuronal progenitor factors, which are repressed by SHH signals. Involved in the transcriptional repression of MNX1 in non-motor neuron cells. Acts as a regulator of energy metabolism.\n                         IRX3_HUMAN,P78415\n                         
## 26                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Seems to be required for maximal rate of protein biosynthesis. Enhances ribosome dissociation into subunits and stabilizes the binding of the initiator Met-tRNA(I) to 40 S ribosomal subunits (By similarity).\n                         IF1AY_HUMAN,O14602\n                         
## 27                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         no summary
## 28                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Probable ATP-dependent RNA helicase. During immune response, may enhance IFNB1 expression via IRF3/IRF7 pathway (By similarity).\n                         DDX3Y_HUMAN,O15523\n                         
## 29                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             May function as a ubiquitin-protein or polyubiquitin hydrolase involved both in the processing of ubiquitin precursors and of ubiquitinated proteins. May therefore play an important regulatory role at the level of protein turnover by preventing degradation of proteins through the removal of conjugated ubiquitin. Essential component of TGF-beta/BMP signaling cascade. Deubiquitinates monoubiquitinated SMAD4, opposing the activity of E3 ubiquitin-protein ligase TRIM33. Monoubiquitination of SMAD4 hampers its ability to form a stable complex with activated SMAD2/3 resulting in inhibition of TGF-beta/BMP signaling cascade. Deubiquitination of SMAD4 by USP9X re-empowers its competence to mediate TGF-beta signaling (By similarity).\n                         USP9Y_HUMAN,O00507\n                         
## 30                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         no summary
## 31                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           UDPGT is of major importance in the conjugation and subsequent elimination of potentially toxic xenobiotics and endogenous compounds. The major substrates of this isozyme are eugenol > 4-methylumbelliferone > dihydrotestosterone (DHT) > androstane-3-alpha,17-beta-diol (3-alpha-diol) > testosterone > androsterone (ADT).\n                         UDB17_HUMAN,O75795\n                         
## 32                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              Male-specific histone demethylase that catalyzes trimethylated 'Lys-27' (H3K27me3) demethylation in histone H3. Has relatively low lysine demethylase activity.\n                         UTY_HUMAN,O14607\n                         
## 33                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            Thermolysin-like specificity, but is almost confined on acting on polypeptides of up to 30 amino acids (PubMed:15283675, PubMed:8168535). Biologically important in the destruction of opioid peptides such as Met- and Leu-enkephalins by cleavage of a Gly-Phe bond (PubMed:17101991). Able to cleave angiotensin-1, angiotensin-2 and angiotensin 1-9 (PubMed:15283675). Involved in the degradation of atrial natriuretic factor (ANF) (PubMed:2531377, PubMed:2972276). Displays UV-inducible elastase activity toward skin preelastic and elastic fibers (PubMed:20876573).\n                         NEP_HUMAN,P08473\n                         
## 34                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Inhibitor of phospholipase A2, also possesses anti-coagulant properties. Also cleaves the cyclic bond of inositol 1,2-cyclic phosphate to form inositol 1-phosphate.\n                         ANXA3_HUMAN,P12429\n                         
##                  todaysDate
## 1  Thu Jul 30 10:51:03 2020
## 2  Thu Jul 30 10:51:04 2020
## 3  Thu Jul 30 10:51:06 2020
## 4  Thu Jul 30 10:51:08 2020
## 5  Thu Jul 30 10:51:09 2020
## 6  Thu Jul 30 10:51:11 2020
## 7  Thu Jul 30 10:51:14 2020
## 8  Thu Jul 30 10:51:16 2020
## 9  Thu Jul 30 10:51:17 2020
## 10 Thu Jul 30 10:51:19 2020
## 11 Thu Jul 30 10:51:21 2020
## 12 Thu Jul 30 10:51:22 2020
## 13 Thu Jul 30 10:51:23 2020
## 14 Thu Jul 30 10:51:24 2020
## 15 Thu Jul 30 10:51:26 2020
## 16 Thu Jul 30 10:51:27 2020
## 17 Thu Jul 30 10:51:28 2020
## 18 Thu Jul 30 10:51:30 2020
## 19 Thu Jul 30 10:51:31 2020
## 20 Thu Jul 30 10:51:32 2020
## 21 Thu Jul 30 10:51:34 2020
## 22 Thu Jul 30 10:51:36 2020
## 23 Thu Jul 30 10:51:38 2020
## 24 Thu Jul 30 10:51:39 2020
## 25 Thu Jul 30 10:51:40 2020
## 26 Thu Jul 30 10:51:41 2020
## 27 Thu Jul 30 10:51:42 2020
## 28 Thu Jul 30 10:51:44 2020
## 29 Thu Jul 30 10:51:45 2020
## 30 Thu Jul 30 10:51:46 2020
## 31 Thu Jul 30 10:51:47 2020
## 32 Thu Jul 30 10:51:49 2020
## 33 Thu Jul 30 10:51:50 2020
## 34 Thu Jul 30 10:51:52 2020

genecards data grab

Janis Corona

7/30/2020