The 53-gene “CTRA” set listed below includes (a) 19 proinflammatory genes which are upregulated in CTRA “on average” (b) 31 genes involved in type I IFN responses down-regulated in the CTRA (c) 3 genes involved in antibody synthesis down-regulated in the CTRA. These molecules have been historically designated by their HGNC names (HUGO gene nomenclature committee).

inflamatory     = c("IL1A", "IL1B", "IL6", "IL8", "TNF", "PTGS1", "PTGS2",
                    "FOS", "FOSB", "FOSL1", "FOSL2", "JUN", "JUNB", "JUND",
                    "NFKB1", "NFKB2", "REL", "RELA", "RELB")
interferonTypeI = c("GBP1", "IFI16", "IFI27", "IFI27L1", "IFI27L2", "IFI30",
                    "IFI35", "IFI44", "IFI44L", "IFI6", "IFIH1", "IFIT1",
                    "IFIT2", "IFIT3", "IFIT5", "IFIT1L", "IFITM1", "IFITM2",
                    "IFITM3", "IFITM4P", "IFITM5", "IFNB1", "IRF2", "IRF7",
                    "IRF8", "MX1", "MX2", "OAS1", "OAS2", "OAS3", "OASL")
antibody        = c("IGJ", "IGLL1", "IGLL3")
ctra0           = c(inflamatory, interferonTypeI, antibody)

ctraCore0       = c("IRF7", "JUN", "IGJ", "IL8", "IL1B", "FOSB", "FOSL2", "IFIT3", "IFI35", "IFI44L", "MX1", "OAS2")
ctraCore        = c("IRF7", "JUN", "JCHAIN", "CXCL8", "IL1B", "FOSB", "FOSL2", "IFIT3", "IFI35", "IFI44L", "MX1", "OAS2")

To connect our data to previous CTRA publications we must relate three sets of gene identifiers: names according to the HGNC (as above), names according to the Illumina Human HT-12 v4 BeadArray (used for original CTRA publications) and names according to our own HuGene 2.0 ST array V1. To do this we queried the Ensembl Genes 85 database (homo sapiens, archived in July 2016) via the biomaRt interface.

library("dplyr")
options(dplyr.print_max = 1e9)
library("Biobase")
eData   <- exprs # convenience
library("biomaRt")
ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", 
                   dataset = "hsapiens_gene_ensembl", 
                   host = "jul2016.archive.ensembl.org")

First note that 4 of 53 HGNC names have been replaced since the original CTRA publications.

a = getBM(attributes = c("hgnc_symbol"),
           filters = "hgnc_symbol",
           values = ctra0,
           mart = ensembl) %>% tbl_df
(missing = setdiff(ctra0, a$hgnc_symbol))
## [1] "IL8"    "IFIT1L" "IGJ"    "IGLL3"

We will therefore use the newer names (confirming they are all present).

inflamatory     = replace(inflamatory, inflamatory == "IL8", "CXCL8")
interferonTypeI = replace(interferonTypeI , interferonTypeI == "IFIT1L", "IFIT1B")
antibody        = replace(antibody, antibody == "IGJ", "JCHAIN")
antibody        = replace(antibody, antibody == "IGLL3", "IGLL3P")
ctra            = c(inflamatory, interferonTypeI, antibody)

a = getBM(attributes = c("hgnc_symbol"),
           filters = "hgnc_symbol",
           values = ctra,
           mart = ensembl) %>% tbl_df
length(setdiff(ctra, a$hgnc_symbol)) == 0 # check all present now
## [1] TRUE

Oddly, Illumina Human HT-12 v4 BeadArrays do not assay 2 of 53 CTRA genes, according to the Ensembl Genes 85 database (NAs below).

(c <- getBM(attributes = c("hgnc_symbol", "illumina_humanht_12_v4"),
             filters = "hgnc_symbol",
             values = c("IFI30", "IFIH1"),
             mart = ensembl))
##   hgnc_symbol illumina_humanht_12_v4
## 1       IFI30                     NA
## 2       IFIH1                     NA

Confirm that IFI30 and IFIH1 are the only two genes not assayed by Illumina Human HT-12 v4 BeadArrays (“illumina_humanht_12_v4”).

a <- getBM(attributes = c("hgnc_symbol", "illumina_humanht_12_v4"),
             filters = "hgnc_symbol",
             values = ctra,
             mart = ensembl) %>% tbl_df
a <- a %>% mutate(illumina_humanht_12_v4 = replace(illumina_humanht_12_v4, illumina_humanht_12_v4 =="", NA))
b <- a %>% filter(!is.na(illumina_humanht_12_v4)) # remove NA
(missing_ctra = setdiff(ctra, b$hgnc_symbol))
## [1] "IFI30" "IFIH1"

Consult Illumina product information.

Similarly, our HuGene 2.0 ST array V1 (“affy_hugene_2_0_st_v1”) did not assay 3 of 53 CTRA genes.

a <- getBM(attributes = c("hgnc_symbol", "affy_hugene_2_0_st_v1"),
             filters = "hgnc_symbol",
             values = ctra,
             mart = ensembl) %>% tbl_df
b <- a %>% filter(!is.na(affy_hugene_2_0_st_v1)) # filter out missing values
(missing_ctra = setdiff(ctra, b$hgnc_symbol))
## [1] "GBP1"   "MX2"    "IGLL3P"
(found_ctra   = setdiff(ctra, missing_ctra))
##  [1] "IL1A"    "IL1B"    "IL6"     "CXCL8"   "TNF"     "PTGS1"   "PTGS2"  
##  [8] "FOS"     "FOSB"    "FOSL1"   "FOSL2"   "JUN"     "JUNB"    "JUND"   
## [15] "NFKB1"   "NFKB2"   "REL"     "RELA"    "RELB"    "IFI16"   "IFI27"  
## [22] "IFI27L1" "IFI27L2" "IFI30"   "IFI35"   "IFI44"   "IFI44L"  "IFI6"   
## [29] "IFIH1"   "IFIT1"   "IFIT2"   "IFIT3"   "IFIT5"   "IFIT1B"  "IFITM1" 
## [36] "IFITM2"  "IFITM3"  "IFITM4P" "IFITM5"  "IFNB1"   "IRF2"    "IRF7"   
## [43] "IRF8"    "MX1"     "OAS1"    "OAS2"    "OAS3"    "OASL"    "JCHAIN" 
## [50] "IGLL1"
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin15.5.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] biomaRt_2.30.0      Biobase_2.34.0      BiocGenerics_0.20.0
## [4] dplyr_0.5.0        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8          AnnotationDbi_1.36.0 knitr_1.15.1        
##  [4] magrittr_1.5         IRanges_2.8.1        R6_2.2.0            
##  [7] stringr_1.1.0        tools_3.3.1          DBI_0.5-1           
## [10] htmltools_0.3.5      lazyeval_0.2.0       yaml_2.1.14         
## [13] assertthat_0.1       rprojroot_1.1        digest_0.6.10       
## [16] tibble_1.2           S4Vectors_0.12.0     bitops_1.0-6        
## [19] RCurl_1.95-4.8       evaluate_0.10        RSQLite_1.0.0       
## [22] rmarkdown_1.2        stringi_1.1.2        backports_1.0.4     
## [25] stats4_3.3.1         XML_3.98-1.5