Using the top genes from Lyme disease, Multiple Sclerosis, Fibromyalgia, and Epstein-Barr Virus for a machine model. All fold change values verified as pathology over controls or healthy. Some data frames have the Ensembl gene ID and not the Genecards ID, and some vice versa.

MS_barcode_genes <- read.csv('MS_top41genes_AAs_both_cDNA_mRNA.csv', sep=',', header=T, na.strings=c('',' ','na','NA'))
Lyme_genes33 <- read.csv('top33genes100vars_Lyme_ChrSumms.csv', sep=',', header=T, na.strings=c('',' ','na','NA'))
FM_genes15 <- read.csv('top15DF_Fibromyalgia_all.csv', sep=',', header=T, na.strings=c('',' ','na','NA'))
EBV_genes80 <- read.csv('top80_EBV_patient_vs_control_noStim.csv',sep=',', header=T, na.strings=c('',' ','na','NA'))
colnames(EBV_genes80)
##  [1] "Geneid"                      "LCL_P1_1_Stim_IL27"         
##  [3] "LCL_P1_1_Basal"              "LCL_P2_Stim_IL27"           
##  [5] "LCL_P2_Basal"                "LCL_Ctrl_1_Stim_IL27"       
##  [7] "LCL_Ctrl_1_Basal"            "LCL_Ctrl_2_Stim_IL27"       
##  [9] "LCL_Ctrl_2_Basal"            "sampleSumOfCounts"          
## [11] "basalcontrolMeans"           "basalPatientMeans"          
## [13] "foldchange_pt_ctrl_not_stim" "controlMeans_IL27_stim"     
## [15] "patientMeans_IL27_stim"      "foldchange_pt_ctrol_stim"
EBV_ensembl <- EBV_genes80$Geneid
EBV_ensembl
##  [1] "ENSG00000211899" "ENSG00000164458" "ENSG00000211644" "ENSG00000163600"
##  [5] "ENSG00000137801" "ENSG00000070031" "ENSG00000084710" "ENSG00000128510"
##  [9] "ENSG00000255026" "ENSG00000115380" "ENSG00000142794" "ENSG00000124507"
## [13] "ENSG00000175329" "ENSG00000224769" "ENSG00000139364" "ENSG00000125869"
## [17] "ENSG00000101307" "ENSG00000276476" "ENSG00000115009" "ENSG00000197632"
## [21] "ENSG00000197057" "ENSG00000148408" "ENSG00000268758" "ENSG00000172380"
## [25] "ENSG00000232977" "ENSG00000205086" "ENSG00000185304" "ENSG00000179144"
## [29] "ENSG00000151320" "ENSG00000160183" "ENSG00000125810" "ENSG00000151640"
## [33] "ENSG00000163053" "ENSG00000264781" "ENSG00000181847" "ENSG00000149948"
## [37] "ENSG00000003096" "ENSG00000261471" "ENSG00000017427" "ENSG00000274276"
## [41] "ENSG00000141756" "ENSG00000134874" "ENSG00000185477" "ENSG00000224318"
## [45] "ENSG00000174946" "ENSG00000241935" "ENSG00000112139" "ENSG00000228065"
## [49] "ENSG00000144824" "ENSG00000092607" "ENSG00000120885" "ENSG00000196739"
## [53] "ENSG00000129204" "ENSG00000137251" "ENSG00000133863" "ENSG00000154556"
## [57] "ENSG00000080493" "ENSG00000223749" "ENSG00000135697" "ENSG00000174567"
## [61] "ENSG00000157110" "ENSG00000187498" "ENSG00000134769" "ENSG00000163814"
## [65] "ENSG00000203797" "ENSG00000151376" "ENSG00000129116" "ENSG00000152990"
## [69] "ENSG00000206557" "ENSG00000211897" "ENSG00000116574" "ENSG00000130508"
## [73] "ENSG00000099725" "ENSG00000164946" "ENSG00000241472" "ENSG00000270550"
## [77] "ENSG00000211893" "ENSG00000211896" "ENSG00000253755" "ENSG00000211892"
colnames(FM_genes15)
##  [1] "Gene"                          "Overall"                      
##  [3] "model"                         "GeneCardsName"                
##  [5] "overallGeneSummary"            "foldchangeHealthyVsMyo"       
##  [7] "foldchangeHealthyVsMyoMedians" "Healthy1"                     
##  [9] "Healthy2"                      "Healthy3"                     
## [11] "Healthy4"                      "Healthy5"                     
## [13] "myo1"                          "myo2"                         
## [15] "myo3"                          "myo4"                         
## [17] "myo5"                          "myo6"                         
## [19] "myo7"                          "healthy_Mean"                 
## [21] "myo_Mean"                      "healthyMedian"                
## [23] "myoMedian"                     "geneCardsSummary"             
## [25] "NCBI_summary"                  "UniProt_summary"              
## [27] "overallGeneSummary.1"          "chromosomeLocation"
FM_genecards <- FM_genes15$GeneCardsName
FM_ensembl <- FM_genes15$Gene
FM_genecards
##  [1] "CXCL2"           "CSF3"            "CH25H"           "TVP23A"         
##  [5] "TVP23A"          "HOXC10"          "HOXC10"          "HOXC10"         
##  [9] "GNAS-AS1"        "GNAS-AS1"        "LOC101928819"    "LOC101928819"   
## [13] "UPK3BL1"         "ENSG00000268292" "ENSG00000268292"
FM_ensembl
##  [1] "ENSG00000081041" "ENSG00000108342" "ENSG00000138135" "ENSG00000166676"
##  [5] "ENSG00000166676" "ENSG00000180818" "ENSG00000180818" "ENSG00000180818"
##  [9] "ENSG00000235590" "ENSG00000235590" "ENSG00000250978" "ENSG00000250978"
## [13] "ENSG00000267368" "ENSG00000268292" "ENSG00000268292"
lyme_genecards <- Lyme_genes33$Gene
lyme_genecards
##  [1] "CABP1"     "CENPF"     "CLEC2L"    "CTXN3"     "CYP7B1"    "DLG3"     
##  [7] "ENO1"      "ESYT1"     "F2"        "FAM162A"   "FRS3"      "GATC"     
## [13] "HECW1"     "HPGD"      "IGFALS"    "ISG20"     "KCNJ16"    "KHDRBS3"  
## [19] "LOC400657" "MAP2K7"    "NUDT18"    "OR52A4"    "OTOS"      "PDZRN3"   
## [25] "PEX26"     "POU3F2"    "POU4F2"    "PRR24"     "PSMF1"     "RGPD3"    
## [31] "RNF168"    "SLC1A1"    "TMEM194A"
MS_genecards <- MS_barcode_genes$geneSynonyms
MS_ensembl <- MS_barcode_genes$Ensembl_Name
MS_genecards
##  [1] "TSNARE1"    "CACNA1E"    "STX12"      "PDE4DIPP5"  "PLSCR5"    
##  [6] "CASC8"      " PIK3C2A"   "TSHZ2"      "ST8SIA4"    "IGFBP7"    
## [11] "FAM20C"     "NDUFB5P1"   "SCHLAP1"    "SLIRPP1"    "HDAC4"     
## [16] "CRLF3P3"    "LZIC"       "HSD3B1"     "AQP12B"     "CLINT1"    
## [21] "KCNA6-AS1"  "MIR4432HG"  "TMEM200A"   "ERBB4"      "ESRP1"     
## [26] "WDR35"      "ACTN4"      "NLRP3"      "POM121L15P" "SMARCA2"   
## [31] "CAMKMT"     "CDH8"       "ST3GAL3"    "TFPI"       "RNU6-280P" 
## [36] "ATP5MC3"    "RPL31P30"   "KLHL29"     "DDAH1"      "RPL7P61"   
## [41] "CPA6"
MS_ensembl
##  [1] "ENSG00000171045" "ENSG00000198216" "ENSG00000117758" "ENSG00000275064"
##  [5] "ENSG00000231213" "ENSG00000246228" "ENSG00000011405" "ENSG00000182463"
##  [9] "ENSG00000113532" "ENSG00000163453" "ENSG00000177706" "ENSG00000251025"
## [13] "ENSG00000281131" "ENSG00000227505" "ENSG00000068024" "ENSG00000228225"
## [17] "ENSG00000162441" "ENSG00000203857" "ENSG00000185176" "ENSG00000113282"
## [21] "ENSG00000256988" "ENSG00000228590" "ENSG00000164484" "ENSG00000178568"
## [25] "ENSG00000104413" "ENSG00000118965" "ENSG00000130402" "ENSG00000162711"
## [29] "ENSG00000161103" "ENSG00000080503" "ENSG00000143919" "ENSG00000150394"
## [33] "ENSG00000126091" "ENSG00000003436" "ENSG00000201015" "ENSG00000154518"
## [37] "ENSG00000230702" "ENSG00000119771" "ENSG00000153904" "ENSG00000230282"
## [41] "ENSG00000165078"

The EBV fold change of IL27 stimulated B-cells over control means with gene ID.

EBV_FC <- EBV_genes80[,c(1,16)]
colnames(EBV_FC) <- c("EBV_ensembl_ID","FC_IL27Stim_cntrl")
EBV_FC$topGene_pathology <- "Epstein Barr Virus"
EBV_FC
##     EBV_ensembl_ID FC_IL27Stim_cntrl  topGene_pathology
## 1  ENSG00000211899      1.855040e+04 Epstein Barr Virus
## 2  ENSG00000164458      1.051200e+03 Epstein Barr Virus
## 3  ENSG00000211644      1.793882e+02 Epstein Barr Virus
## 4  ENSG00000163600      1.050000e+02 Epstein Barr Virus
## 5  ENSG00000137801      3.272727e+01 Epstein Barr Virus
## 6  ENSG00000070031      4.075000e+01 Epstein Barr Virus
## 7  ENSG00000084710      7.285714e+01 Epstein Barr Virus
## 8  ENSG00000128510      1.590909e+01 Epstein Barr Virus
## 9  ENSG00000255026      1.721429e+01 Epstein Barr Virus
## 10 ENSG00000115380      1.312500e+01 Epstein Barr Virus
## 11 ENSG00000142794      3.490000e+01 Epstein Barr Virus
## 12 ENSG00000124507      7.504348e+01 Epstein Barr Virus
## 13 ENSG00000175329      5.236364e+01 Epstein Barr Virus
## 14 ENSG00000224769      6.137931e+00 Epstein Barr Virus
## 15 ENSG00000139364      1.263415e+01 Epstein Barr Virus
## 16 ENSG00000125869      1.400000e+02 Epstein Barr Virus
## 17 ENSG00000101307      7.055556e+01 Epstein Barr Virus
## 18 ENSG00000276476      4.677778e+01 Epstein Barr Virus
## 19 ENSG00000115009      8.784314e+00 Epstein Barr Virus
## 20 ENSG00000197632      2.375000e+01 Epstein Barr Virus
## 21 ENSG00000197057      4.557143e+01 Epstein Barr Virus
## 22 ENSG00000148408      6.392857e+00 Epstein Barr Virus
## 23 ENSG00000268758      1.276190e+01 Epstein Barr Virus
## 24 ENSG00000172380      1.914286e+01 Epstein Barr Virus
## 25 ENSG00000232977      1.940000e+01 Epstein Barr Virus
## 26 ENSG00000205086      9.400000e+00 Epstein Barr Virus
## 27 ENSG00000185304      1.400000e+01 Epstein Barr Virus
## 28 ENSG00000179144      2.866667e+01 Epstein Barr Virus
## 29 ENSG00000151320      1.500000e+01 Epstein Barr Virus
## 30 ENSG00000160183      9.108787e+00 Epstein Barr Virus
## 31 ENSG00000125810      6.684211e+00 Epstein Barr Virus
## 32 ENSG00000151640      3.766667e+01 Epstein Barr Virus
## 33 ENSG00000163053      2.911111e+01 Epstein Barr Virus
## 34 ENSG00000264781      4.850000e+01 Epstein Barr Virus
## 35 ENSG00000181847      5.268657e+00 Epstein Barr Virus
## 36 ENSG00000149948      1.872727e+01 Epstein Barr Virus
## 37 ENSG00000003096      3.500000e+00 Epstein Barr Virus
## 38 ENSG00000261471      2.538462e+00 Epstein Barr Virus
## 39 ENSG00000017427      1.552273e+01 Epstein Barr Virus
## 40 ENSG00000274276      6.090909e+00 Epstein Barr Virus
## 41 ENSG00000141756      2.647059e-02 Epstein Barr Virus
## 42 ENSG00000134874      1.188380e-01 Epstein Barr Virus
## 43 ENSG00000185477      7.608696e-02 Epstein Barr Virus
## 44 ENSG00000224318      2.045455e-01 Epstein Barr Virus
## 45 ENSG00000174946      1.269841e-01 Epstein Barr Virus
## 46 ENSG00000241935      3.370787e-02 Epstein Barr Virus
## 47 ENSG00000112139      3.412663e-02 Epstein Barr Virus
## 48 ENSG00000228065      7.299270e-02 Epstein Barr Virus
## 49 ENSG00000144824      8.311346e-02 Epstein Barr Virus
## 50 ENSG00000092607      7.533146e-02 Epstein Barr Virus
## 51 ENSG00000120885      9.566517e-02 Epstein Barr Virus
## 52 ENSG00000196739      6.923077e-02 Epstein Barr Virus
## 53 ENSG00000129204      4.583333e-02 Epstein Barr Virus
## 54 ENSG00000137251      9.935897e-02 Epstein Barr Virus
## 55 ENSG00000133863      2.244389e-02 Epstein Barr Virus
## 56 ENSG00000154556      3.750000e-02 Epstein Barr Virus
## 57 ENSG00000080493      4.404291e-02 Epstein Barr Virus
## 58 ENSG00000223749      9.543568e-02 Epstein Barr Virus
## 59 ENSG00000135697      2.673797e-02 Epstein Barr Virus
## 60 ENSG00000174567      1.764706e-01 Epstein Barr Virus
## 61 ENSG00000157110      8.577406e-02 Epstein Barr Virus
## 62 ENSG00000187498      1.483680e-02 Epstein Barr Virus
## 63 ENSG00000134769      1.011236e-01 Epstein Barr Virus
## 64 ENSG00000163814      4.675758e-02 Epstein Barr Virus
## 65 ENSG00000203797      4.640371e-03 Epstein Barr Virus
## 66 ENSG00000151376      1.264045e-02 Epstein Barr Virus
## 67 ENSG00000129116      7.064018e-02 Epstein Barr Virus
## 68 ENSG00000152990      3.349876e-02 Epstein Barr Virus
## 69 ENSG00000206557      2.355926e-02 Epstein Barr Virus
## 70 ENSG00000211897      1.503059e-02 Epstein Barr Virus
## 71 ENSG00000116574      1.548673e-02 Epstein Barr Virus
## 72 ENSG00000130508      1.355422e-02 Epstein Barr Virus
## 73 ENSG00000099725      3.941240e-03 Epstein Barr Virus
## 74 ENSG00000164946      6.802721e-03 Epstein Barr Virus
## 75 ENSG00000241472      8.097166e-03 Epstein Barr Virus
## 76 ENSG00000270550      3.849115e-03 Epstein Barr Virus
## 77 ENSG00000211893      3.497333e-03 Epstein Barr Virus
## 78 ENSG00000211896      1.246666e-03 Epstein Barr Virus
## 79 ENSG00000253755      1.265289e-03 Epstein Barr Virus
## 80 ENSG00000211892      8.612801e-04 Epstein Barr Virus

Multiple Sclerosis gene ID and Foldchange to control means.

colnames(MS_barcode_genes)
##  [1] "ID_REF"                      "geneSynonyms"               
##  [3] "genecardsSummary"            "NCBI_Summary"               
##  [5] "uniProtSummary"              "Ensembl_Name"               
##  [7] "control1.4362"               "control2.4363"              
##  [9] "control3.4364"               "MS1_r1_4370"                
## [11] "MS1_r2_4371"                 "MS1_r3_4372"                
## [13] "MS1_r4_4373"                 "MS1_r5_4374"                
## [15] "MS2_r1_4375"                 "MS2_r2_4376"                
## [17] "MS2_r3_4377"                 "MS2_r4_4378"                
## [19] "MS2_r5_4379"                 "commercial_r1_4365"         
## [21] "commercial_r2_4366"          "commercial_r3_4367"         
## [23] "commercial_r4_4368"          "commercial_r5_4369"         
## [25] "control_Means"               "MS1_Means"                  
## [27] "MS2_Means"                   "comml_Means"                
## [29] "FC_MS1_vs_control"           "FC_MS2_vs_control"          
## [31] "FC_comml_vs_control"         "FC_inverse_control_vs_comml"
## [33] "cDNA"                        "cDNA_AminoAcids"            
## [35] "mRNA"                        "RNA_AminoAcids"
MS_ID_FC <- MS_barcode_genes[,c(2,6,31)]
colnames(MS_ID_FC) <- c("MS_genecards_ID","MS_Ensembl_ID","MS_FC_comm_cntrl")
MS_ID_FC$topGene_pathology <- 'Multiple Sclerosis'
MS_ID_FC
##    MS_genecards_ID   MS_Ensembl_ID MS_FC_comm_cntrl  topGene_pathology
## 1          TSNARE1 ENSG00000171045       0.10485437 Multiple Sclerosis
## 2          CACNA1E ENSG00000198216      56.40000000 Multiple Sclerosis
## 3            STX12 ENSG00000117758      52.80000000 Multiple Sclerosis
## 4        PDE4DIPP5 ENSG00000275064      48.67500000 Multiple Sclerosis
## 5           PLSCR5 ENSG00000231213      60.60000000 Multiple Sclerosis
## 6            CASC8 ENSG00000246228      62.70000000 Multiple Sclerosis
## 7          PIK3C2A ENSG00000011405      50.52000000 Multiple Sclerosis
## 8            TSHZ2 ENSG00000182463      65.60000000 Multiple Sclerosis
## 9          ST8SIA4 ENSG00000113532      48.00000000 Multiple Sclerosis
## 10          IGFBP7 ENSG00000163453      91.00000000 Multiple Sclerosis
## 11          FAM20C ENSG00000177706      71.80000000 Multiple Sclerosis
## 12        NDUFB5P1 ENSG00000251025      54.45000000 Multiple Sclerosis
## 13         SCHLAP1 ENSG00000281131      47.50000000 Multiple Sclerosis
## 14         SLIRPP1 ENSG00000227505      48.96923077 Multiple Sclerosis
## 15           HDAC4 ENSG00000068024      47.64000000 Multiple Sclerosis
## 16         CRLF3P3 ENSG00000228225      52.10000000 Multiple Sclerosis
## 17            LZIC ENSG00000162441       0.12692308 Multiple Sclerosis
## 18          HSD3B1 ENSG00000203857      68.70000000 Multiple Sclerosis
## 19          AQP12B ENSG00000185176      41.60000000 Multiple Sclerosis
## 20          CLINT1 ENSG00000113282     142.40000000 Multiple Sclerosis
## 21       KCNA6-AS1 ENSG00000256988      49.20000000 Multiple Sclerosis
## 22       MIR4432HG ENSG00000228590       0.09850746 Multiple Sclerosis
## 23        TMEM200A ENSG00000164484      54.60000000 Multiple Sclerosis
## 24           ERBB4 ENSG00000178568      56.85000000 Multiple Sclerosis
## 25           ESRP1 ENSG00000104413      62.00000000 Multiple Sclerosis
## 26           WDR35 ENSG00000118965       0.11650485 Multiple Sclerosis
## 27           ACTN4 ENSG00000130402       0.10800000 Multiple Sclerosis
## 28           NLRP3 ENSG00000162711      65.40000000 Multiple Sclerosis
## 29      POM121L15P ENSG00000161103      63.52500000 Multiple Sclerosis
## 30         SMARCA2 ENSG00000080503      70.20000000 Multiple Sclerosis
## 31          CAMKMT ENSG00000143919      42.36000000 Multiple Sclerosis
## 32            CDH8 ENSG00000150394      50.74285714 Multiple Sclerosis
## 33         ST3GAL3 ENSG00000126091      66.00000000 Multiple Sclerosis
## 34            TFPI ENSG00000003436       0.11186441 Multiple Sclerosis
## 35       RNU6-280P ENSG00000201015       0.09000000 Multiple Sclerosis
## 36         ATP5MC3 ENSG00000154518      44.92500000 Multiple Sclerosis
## 37        RPL31P30 ENSG00000230702       0.09397590 Multiple Sclerosis
## 38          KLHL29 ENSG00000119771       0.08219178 Multiple Sclerosis
## 39           DDAH1 ENSG00000153904       0.06835443 Multiple Sclerosis
## 40         RPL7P61 ENSG00000230282      62.00000000 Multiple Sclerosis
## 41            CPA6 ENSG00000165078      58.20000000 Multiple Sclerosis

Lyme disease ID and Foldchange.

colnames(Lyme_genes33)
##   [1] "Gene"                                
##   [2] "healthyControl_1"                    
##   [3] "healthyControl_2"                    
##   [4] "healthyControl_3"                    
##   [5] "healthyControl_4"                    
##   [6] "healthyControl_5"                    
##   [7] "healthyControl_6"                    
##   [8] "healthyControl_7"                    
##   [9] "healthyControl_8"                    
##  [10] "healthyControl_9"                    
##  [11] "healthyControl_10"                   
##  [12] "healthyControl_11"                   
##  [13] "healthyControl_12"                   
##  [14] "healthyControl_13"                   
##  [15] "healthyControl_14"                   
##  [16] "healthyControl_15"                   
##  [17] "healthyControl_16"                   
##  [18] "healthyControl_17"                   
##  [19] "healthyControl_18"                   
##  [20] "healthyControl_19"                   
##  [21] "healthyControl_20"                   
##  [22] "healthyControl_21"                   
##  [23] "acuteLymeDisease_1"                  
##  [24] "acuteLymeDisease_2"                  
##  [25] "acuteLymeDisease_3"                  
##  [26] "acuteLymeDisease_4"                  
##  [27] "acuteLymeDisease_5"                  
##  [28] "acuteLymeDisease_6"                  
##  [29] "acuteLymeDisease_7"                  
##  [30] "acuteLymeDisease_8"                  
##  [31] "acuteLymeDisease_9"                  
##  [32] "acuteLymeDisease_10"                 
##  [33] "acuteLymeDisease_11"                 
##  [34] "acuteLymeDisease_12"                 
##  [35] "acuteLymeDisease_13"                 
##  [36] "acuteLymeDisease_14"                 
##  [37] "acuteLymeDisease_15"                 
##  [38] "acuteLymeDisease_16"                 
##  [39] "acuteLymeDisease_17"                 
##  [40] "acuteLymeDisease_18"                 
##  [41] "acuteLymeDisease_19"                 
##  [42] "acuteLymeDisease_20"                 
##  [43] "acuteLymeDisease_21"                 
##  [44] "acuteLymeDisease_22"                 
##  [45] "acuteLymeDisease_23"                 
##  [46] "acuteLymeDisease_24"                 
##  [47] "acuteLymeDisease_25"                 
##  [48] "acuteLymeDisease_26"                 
##  [49] "acuteLymeDisease_27"                 
##  [50] "acuteLymeDisease_28"                 
##  [51] "Antibodies_1month_1"                 
##  [52] "Antibodies_1month_2"                 
##  [53] "Antibodies_1month_3"                 
##  [54] "Antibodies_1month_4"                 
##  [55] "Antibodies_1month_5"                 
##  [56] "Antibodies_1month_6"                 
##  [57] "Antibodies_1month_7"                 
##  [58] "Antibodies_1month_8"                 
##  [59] "Antibodies_1month_9"                 
##  [60] "Antibodies_1month_10"                
##  [61] "Antibodies_1month_11"                
##  [62] "Antibodies_1month_12"                
##  [63] "Antibodies_1month_13"                
##  [64] "Antibodies_1month_14"                
##  [65] "Antibodies_1month_15"                
##  [66] "Antibodies_1month_16"                
##  [67] "Antibodies_1month_17"                
##  [68] "Antibodies_1month_18"                
##  [69] "Antibodies_1month_19"                
##  [70] "Antibodies_1month_20"                
##  [71] "Antibodies_1month_21"                
##  [72] "Antibodies_1month_22"                
##  [73] "Antibodies_1month_23"                
##  [74] "Antibodies_1month_24"                
##  [75] "Antibodies_1month_25"                
##  [76] "Antibodies_1month_26"                
##  [77] "Antibodies_1month_27"                
##  [78] "Antibodies_6months_1"                
##  [79] "Antibodies_6months_2"                
##  [80] "Antibodies_6months_3"                
##  [81] "Antibodies_6months_4"                
##  [82] "Antibodies_6months_5"                
##  [83] "Antibodies_6months_6"                
##  [84] "Antibodies_6months_7"                
##  [85] "Antibodies_6months_8"                
##  [86] "Antibodies_6months_9"                
##  [87] "Antibodies_6months_10"               
##  [88] "healthy_Mean"                        
##  [89] "acuteLymeDisease_Mean"               
##  [90] "antibodies_1month_Mean"              
##  [91] "antibodies_6month_Mean"              
##  [92] "acuteHealthy_foldChange"             
##  [93] "antibodies_1month_healthy_foldChange"
##  [94] "antibodies_6month_healthy_foldchange"
##  [95] "acute_vs_6month_foldchange"          
##  [96] "Chromosomal.Location"                
##  [97] "GeneCardsGeneSummary"                
##  [98] "NCBI_GeneSummary"                    
##  [99] "UniProtKB_Swiss.Prot_GeneSummary"    
## [100] "overallGeneSummary"

Lyme disease ID and chronic infection of 6 months.

Lyme_ID_FC <- Lyme_genes33[,c(1,94)]
colnames(Lyme_ID_FC) <- c("Lyme_genecards_ID","Lyme_FC_6months_healthy")
Lyme_ID_FC$topGene_pathology <- "Lyme Disease 6 months"
Lyme_ID_FC
##    Lyme_genecards_ID Lyme_FC_6months_healthy     topGene_pathology
## 1              CABP1            -14075.76165 Lyme Disease 6 months
## 2              CENPF             -6896.96986 Lyme Disease 6 months
## 3             CLEC2L              9986.32035 Lyme Disease 6 months
## 4              CTXN3           -128816.81220 Lyme Disease 6 months
## 5             CYP7B1              1067.07194 Lyme Disease 6 months
## 6               DLG3               133.04380 Lyme Disease 6 months
## 7               ENO1              1646.31692 Lyme Disease 6 months
## 8              ESYT1              1952.20968 Lyme Disease 6 months
## 9                 F2              2367.18140 Lyme Disease 6 months
## 10           FAM162A             -9664.67906 Lyme Disease 6 months
## 11              FRS3             -1551.41693 Lyme Disease 6 months
## 12              GATC              1879.72930 Lyme Disease 6 months
## 13             HECW1             -1132.92803 Lyme Disease 6 months
## 14              HPGD               -89.52153 Lyme Disease 6 months
## 15            IGFALS               766.94254 Lyme Disease 6 months
## 16             ISG20            154077.25710 Lyme Disease 6 months
## 17            KCNJ16              2297.56146 Lyme Disease 6 months
## 18           KHDRBS3             -2100.04101 Lyme Disease 6 months
## 19         LOC400657              -542.12212 Lyme Disease 6 months
## 20            MAP2K7              2142.65543 Lyme Disease 6 months
## 21            NUDT18              -157.63446 Lyme Disease 6 months
## 22            OR52A4               537.58743 Lyme Disease 6 months
## 23              OTOS             -1109.20155 Lyme Disease 6 months
## 24            PDZRN3             -1758.35227 Lyme Disease 6 months
## 25             PEX26              2759.84395 Lyme Disease 6 months
## 26            POU3F2            -16524.10000 Lyme Disease 6 months
## 27            POU4F2             -1368.77924 Lyme Disease 6 months
## 28             PRR24               463.47392 Lyme Disease 6 months
## 29             PSMF1              7907.48209 Lyme Disease 6 months
## 30             RGPD3                -1.00000 Lyme Disease 6 months
## 31            RNF168              2964.55827 Lyme Disease 6 months
## 32            SLC1A1              -107.63650 Lyme Disease 6 months
## 33          TMEM194A               117.81335 Lyme Disease 6 months

Fibromyalgia ID and foldchanges.

colnames(FM_genes15)
##  [1] "Gene"                          "Overall"                      
##  [3] "model"                         "GeneCardsName"                
##  [5] "overallGeneSummary"            "foldchangeHealthyVsMyo"       
##  [7] "foldchangeHealthyVsMyoMedians" "Healthy1"                     
##  [9] "Healthy2"                      "Healthy3"                     
## [11] "Healthy4"                      "Healthy5"                     
## [13] "myo1"                          "myo2"                         
## [15] "myo3"                          "myo4"                         
## [17] "myo5"                          "myo6"                         
## [19] "myo7"                          "healthy_Mean"                 
## [21] "myo_Mean"                      "healthyMedian"                
## [23] "myoMedian"                     "geneCardsSummary"             
## [25] "NCBI_summary"                  "UniProt_summary"              
## [27] "overallGeneSummary.1"          "chromosomeLocation"
FM_ID_FC <- FM_genes15[,c(1,4,6)]
colnames(FM_ID_FC) <- c("FM_Ensembl_ID","FM_genecards_ID", "FM_FC_myo_healthy")
FM_ID_FC$topGene_pathology <- 'fibromyalgia'
FM_ID_FC
##      FM_Ensembl_ID FM_genecards_ID FM_FC_myo_healthy topGene_pathology
## 1  ENSG00000081041           CXCL2      2.650281e-02      fibromyalgia
## 2  ENSG00000108342            CSF3      2.265523e-02      fibromyalgia
## 3  ENSG00000138135           CH25H      2.248895e-02      fibromyalgia
## 4  ENSG00000166676          TVP23A      5.020766e-02      fibromyalgia
## 5  ENSG00000166676          TVP23A      5.020766e-02      fibromyalgia
## 6  ENSG00000180818          HOXC10      3.268590e-02      fibromyalgia
## 7  ENSG00000180818          HOXC10      3.268590e-02      fibromyalgia
## 8  ENSG00000180818          HOXC10      3.268590e-02      fibromyalgia
## 9  ENSG00000235590        GNAS-AS1      3.344577e-02      fibromyalgia
## 10 ENSG00000235590        GNAS-AS1      3.344577e-02      fibromyalgia
## 11 ENSG00000250978    LOC101928819      6.801948e-02      fibromyalgia
## 12 ENSG00000250978    LOC101928819      6.801948e-02      fibromyalgia
## 13 ENSG00000267368         UPK3BL1      8.896551e+13      fibromyalgia
## 14 ENSG00000268292 ENSG00000268292      3.325812e+00      fibromyalgia
## 15 ENSG00000268292 ENSG00000268292      3.325812e+00      fibromyalgia

Write out the EBV and the Lyme database to add in the genecards to EBV data frame and the Ensembl ID to the Lyme data frame.

write.csv(EBV_FC,'EBV_FC_needs_genecardsID.csv',row.names=F)
write.csv(Lyme_ID_FC,'Lyme_FC_needs_EnsemblID.csv',row.names=F)
write.csv(FM_ID_FC,'FM_FC_needs_EnsemblID.csv',row.names=F)
write.csv(MS_ID_FC,'MS_FC_needs_EnsemblID.csv',row.names=F)

After adding in alternate names we can upload the dataframes to work with.

The alternate names were added in manually, only one gene in the Lyme disease set had no alternate name as it was a substrate. Clear environment and read these data frames into it.

lyme_IDs_FCs <- read.csv("Lyme_FC_needs_EnsemblID_ensemble_ID_added.csv", sep=',', header=T, na.strings=c('',' ','na','NA'))
EBV_IDs_FCs <- read.csv("EBV_FC_needs_genecardsID_addedGenecardsID.csv", sep=',', header=T, na.strings=c('',' ','na','NA'))
FM_IDs_FCs <- read.csv("FM_FC_needs_EnsemblID.csv", sep=',', header=T, na.strings=c('',' ','na','NA'))
MS_IDs_FCs <- read.csv('MS_FC_needs_EnsemblID.csv', sep=',', header=T, na.strings=c('',' ','na','NA'))
MS <- MS_IDs_FCs[order(MS_IDs_FCs$MS_FC_comm_cntrl, decreasing=T),]
head(MS)
##    MS_genecards_ID   MS_Ensembl_ID MS_FC_comm_cntrl  topGene_pathology
## 20          CLINT1 ENSG00000113282            142.4 Multiple Sclerosis
## 10          IGFBP7 ENSG00000163453             91.0 Multiple Sclerosis
## 11          FAM20C ENSG00000177706             71.8 Multiple Sclerosis
## 30         SMARCA2 ENSG00000080503             70.2 Multiple Sclerosis
## 18          HSD3B1 ENSG00000203857             68.7 Multiple Sclerosis
## 33         ST3GAL3 ENSG00000126091             66.0 Multiple Sclerosis
FM <- FM_IDs_FCs[order(FM_IDs_FCs$FM_FC_myo_healthy, decreasing=T),]
head(FM)
##      FM_Ensembl_ID FM_genecards_ID FM_FC_myo_healthy topGene_pathology
## 13 ENSG00000267368         UPK3BL1      8.896551e+13      fibromyalgia
## 14 ENSG00000268292 ENSG00000268292      3.325812e+00      fibromyalgia
## 15 ENSG00000268292 ENSG00000268292      3.325812e+00      fibromyalgia
## 11 ENSG00000250978    LOC101928819      6.801948e-02      fibromyalgia
## 12 ENSG00000250978    LOC101928819      6.801948e-02      fibromyalgia
## 4  ENSG00000166676          TVP23A      5.020766e-02      fibromyalgia
Lyme <- lyme_IDs_FCs[order(lyme_IDs_FCs$Lyme_FC_6months_healthy, decreasing=T),]
head(Lyme)
##    Lyme_ensembl_ID Lyme_genecards_ID Lyme_FC_6months_healthy
## 16 ENSG00000172183             ISG20              154077.257
## 3  ENSG00000236279            CLEC2L                9986.320
## 29 ENSG00000125818             PSMF1                7907.482
## 31 ENSG00000163961            RNF168                2964.558
## 25 ENSG00000215193             PEX26                2759.844
## 9  ENSG00000180210                F2                2367.181
##        topGene_pathology
## 16 Lyme Disease 6 months
## 3  Lyme Disease 6 months
## 29 Lyme Disease 6 months
## 31 Lyme Disease 6 months
## 25 Lyme Disease 6 months
## 9  Lyme Disease 6 months
EBV <- EBV_IDs_FCs[order(EBV_IDs_FCs$FC_IL27Stim_cntrl,decreasing=T),]
head(EBV)
##     EBV_ensembl_ID EBV_genecards_ID
## 1  ENSG00000211899             IGHM
## 2  ENSG00000164458             TBXT
## 3  ENSG00000211644         IGLV1-51
## 16 ENSG00000125869            LAMP5
## 4  ENSG00000163600             ICOS
## 12 ENSG00000124507          PACSIN1
##                                    EBV_genecards_extendedName FC_IL27Stim_cntrl
## 1                            Immunoglobulin Heavy Constant Mu       18550.40000
## 2                               T-Box Transcription Factor T         1051.20000
## 3                         Immunoglobulin Lambda Variable 1-51         179.38824
## 16      Lysosomal Associated Membrane Protein Family Member 5         140.00000
## 4                               Inducible T Cell Costimulator         105.00000
## 12 Protein Kinase C And Casein Kinase Substrate In Neurons 1           75.04348
##     topGene_pathology
## 1  Epstein Barr Virus
## 2  Epstein Barr Virus
## 3  Epstein Barr Virus
## 16 Epstein Barr Virus
## 4  Epstein Barr Virus
## 12 Epstein Barr Virus

See if any genes in top genes for all pathologies. Lets remove the other data frames first.

rm(EBV_IDs_FCs,FM_IDs_FCs,lyme_IDs_FCs,MS_IDs_FCs)
EBV_list <- EBV$EBV_ensembl_ID
FM_list <- FM$FM_Ensembl_ID
Lyme_list <- Lyme$Lyme_ensembl_ID
MS_list <- MS$MS_Ensembl_ID
a <- EBV_list %in% FM_list
b <- EBV_list %in% Lyme_list
c <- EBV_list %in% MS_list

d <- FM_list %in% Lyme_list
e <- FM_list %in% MS_list

f <- Lyme_list %in% MS_list

All are false so none of the genes by Ensemble ID are shared among the top genes for each pathology. Lets just rowbind them after making column names the same on these 4 data sets, with some having to remove a feature field.

colnames(FM) <- c("Ensembl_ID", "Genecards_ID","FC_pathology_control","topGenePathology")

head(FM)
##         Ensembl_ID    Genecards_ID FC_pathology_control topGenePathology
## 13 ENSG00000267368         UPK3BL1         8.896551e+13     fibromyalgia
## 14 ENSG00000268292 ENSG00000268292         3.325812e+00     fibromyalgia
## 15 ENSG00000268292 ENSG00000268292         3.325812e+00     fibromyalgia
## 11 ENSG00000250978    LOC101928819         6.801948e-02     fibromyalgia
## 12 ENSG00000250978    LOC101928819         6.801948e-02     fibromyalgia
## 4  ENSG00000166676          TVP23A         5.020766e-02     fibromyalgia
colnames(MS) <- c( "Genecards_ID","Ensembl_ID","FC_pathology_control","topGenePathology")
MS <- MS[,c(2,1,3,4)]
head(MS)
##         Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 20 ENSG00000113282       CLINT1                142.4 Multiple Sclerosis
## 10 ENSG00000163453       IGFBP7                 91.0 Multiple Sclerosis
## 11 ENSG00000177706       FAM20C                 71.8 Multiple Sclerosis
## 30 ENSG00000080503      SMARCA2                 70.2 Multiple Sclerosis
## 18 ENSG00000203857       HSD3B1                 68.7 Multiple Sclerosis
## 33 ENSG00000126091      ST3GAL3                 66.0 Multiple Sclerosis
colnames(Lyme) <- c("Ensembl_ID", "Genecards_ID","FC_pathology_control","topGenePathology")
head(Lyme)
##         Ensembl_ID Genecards_ID FC_pathology_control      topGenePathology
## 16 ENSG00000172183        ISG20           154077.257 Lyme Disease 6 months
## 3  ENSG00000236279       CLEC2L             9986.320 Lyme Disease 6 months
## 29 ENSG00000125818        PSMF1             7907.482 Lyme Disease 6 months
## 31 ENSG00000163961       RNF168             2964.558 Lyme Disease 6 months
## 25 ENSG00000215193        PEX26             2759.844 Lyme Disease 6 months
## 9  ENSG00000180210           F2             2367.181 Lyme Disease 6 months
colnames(EBV) <- c("Ensembl_ID", "Genecards_ID","extendedName", "FC_pathology_control","topGenePathology")
EBV <- EBV[,c(1,2,4,5)]
head(EBV)
##         Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 1  ENSG00000211899         IGHM          18550.40000 Epstein Barr Virus
## 2  ENSG00000164458         TBXT           1051.20000 Epstein Barr Virus
## 3  ENSG00000211644     IGLV1-51            179.38824 Epstein Barr Virus
## 16 ENSG00000125869        LAMP5            140.00000 Epstein Barr Virus
## 4  ENSG00000163600         ICOS            105.00000 Epstein Barr Virus
## 12 ENSG00000124507      PACSIN1             75.04348 Epstein Barr Virus

They all have the same number of columns and names so they can be row bound to each other.

pathologies4 <- rbind(EBV,FM,Lyme,MS)
head(pathologies4)
##         Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 1  ENSG00000211899         IGHM          18550.40000 Epstein Barr Virus
## 2  ENSG00000164458         TBXT           1051.20000 Epstein Barr Virus
## 3  ENSG00000211644     IGLV1-51            179.38824 Epstein Barr Virus
## 16 ENSG00000125869        LAMP5            140.00000 Epstein Barr Virus
## 4  ENSG00000163600         ICOS            105.00000 Epstein Barr Virus
## 12 ENSG00000124507      PACSIN1             75.04348 Epstein Barr Virus
tail(pathologies4)
##          Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 115 ENSG00000171045      TSNARE1           0.10485437 Multiple Sclerosis
## 222 ENSG00000228590    MIR4432HG           0.09850746 Multiple Sclerosis
## 371 ENSG00000230702     RPL31P30           0.09397590 Multiple Sclerosis
## 351 ENSG00000201015    RNU6-280P           0.09000000 Multiple Sclerosis
## 381 ENSG00000119771       KLHL29           0.08219178 Multiple Sclerosis
## 391 ENSG00000153904        DDAH1           0.06835443 Multiple Sclerosis

Looks good. We have the data for MS, EBV, Lyme, and FM. We can write this out to csv.

write.csv(pathologies4,'pathologies4_MS_FM_EBV_Lyme.csv', row.names=F)

Knowing the media and way the fold change values of each gene were obtained is useful as well, we can add that later by adding a class feature for media type and gsub where the class is the respective pathology with a which command for the media type.

Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection.

In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA.

The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes.

Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics.

So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells.

There is a study on mononucleosis of EBV infection that hasn’t been analyzed or used to make predictions on class of sample given the information the study provided. That analysis and any on Hodgkin s Lymphoma, Burkett Lymphoma, and throat carcinoma related to EBV will be analyzed if found and added to this data to make a machine and see how reliable the top genes from those studies are in predicting a class of pathology from all classes used.

High throughput sequencing was the RNA-Seq done on all samples except for the Lyme disease study that used micro arrays to get gene expression data in fragments per kilo million.

So we have:

LymeIndices <- which(pathologies4$topGenePathology=="Lyme Disease 6 months")
EBV_Indices <- which(pathologies4$topGenePathology=="Epstein Barr Virus")
MS_Indices <- which(pathologies4$topGenePathology=="Multiple Sclerosis")
FM_Indices <- which(pathologies4$topGenePathology=="fibromyalgia")
pathologies4$mediaType <- "media"
head(pathologies4)
##         Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 1  ENSG00000211899         IGHM          18550.40000 Epstein Barr Virus
## 2  ENSG00000164458         TBXT           1051.20000 Epstein Barr Virus
## 3  ENSG00000211644     IGLV1-51            179.38824 Epstein Barr Virus
## 16 ENSG00000125869        LAMP5            140.00000 Epstein Barr Virus
## 4  ENSG00000163600         ICOS            105.00000 Epstein Barr Virus
## 12 ENSG00000124507      PACSIN1             75.04348 Epstein Barr Virus
##    mediaType
## 1      media
## 2      media
## 3      media
## 16     media
## 4      media
## 12     media
pathologies4$mediaType[LymeIndices] <- "RBCs of PBMCs array format"
pathologies4$mediaType[EBV_Indices] <- "LCLs of PBMCs RNA-Seq format"
pathologies4$mediaType[MS_Indices] <- "B-cell of PBMCs RNA-Seq format"
pathologies4$mediaType[FM_Indices] <- "skeletal muscle RNA-Seq format"
pathologies4$mediaType
##   [1] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##   [3] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##   [5] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##   [7] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##   [9] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [11] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [13] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [15] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [17] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [19] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [21] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [23] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [25] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [27] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [29] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [31] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [33] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [35] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [37] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [39] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [41] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [43] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [45] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [47] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [49] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [51] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [53] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [55] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [57] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [59] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [61] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [63] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [65] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [67] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [69] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [71] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [73] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [75] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [77] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [79] "LCLs of PBMCs RNA-Seq format"   "LCLs of PBMCs RNA-Seq format"  
##  [81] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [83] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [85] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [87] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [89] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [91] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [93] "skeletal muscle RNA-Seq format" "skeletal muscle RNA-Seq format"
##  [95] "skeletal muscle RNA-Seq format" "RBCs of PBMCs array format"    
##  [97] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
##  [99] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [101] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [103] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [105] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [107] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [109] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [111] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [113] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [115] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [117] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [119] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [121] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [123] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [125] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [127] "RBCs of PBMCs array format"     "RBCs of PBMCs array format"    
## [129] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [131] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [133] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [135] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [137] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [139] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [141] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [143] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [145] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [147] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [149] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [151] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [153] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [155] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [157] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [159] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [161] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [163] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [165] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [167] "B-cell of PBMCs RNA-Seq format" "B-cell of PBMCs RNA-Seq format"
## [169] "B-cell of PBMCs RNA-Seq format"

Show a couple samples of each pathology type and the gene ID and last few features.

pathologies4[c(1,2,81,82,102,103,153,154),]
##          Ensembl_ID    Genecards_ID FC_pathology_control      topGenePathology
## 1   ENSG00000211899            IGHM         1.855040e+04    Epstein Barr Virus
## 2   ENSG00000164458            TBXT         1.051200e+03    Epstein Barr Virus
## 131 ENSG00000267368         UPK3BL1         8.896551e+13          fibromyalgia
## 141 ENSG00000268292 ENSG00000268292         3.325812e+00          fibromyalgia
## 171 ENSG00000153822          KCNJ16         2.297561e+03 Lyme Disease 6 months
## 201 ENSG00000076984          MAP2K7         2.142655e+03 Lyme Disease 6 months
## 413 ENSG00000275064       PDE4DIPP5         4.867500e+01    Multiple Sclerosis
## 93  ENSG00000113532         ST8SIA4         4.800000e+01    Multiple Sclerosis
##                          mediaType
## 1     LCLs of PBMCs RNA-Seq format
## 2     LCLs of PBMCs RNA-Seq format
## 131 skeletal muscle RNA-Seq format
## 141 skeletal muscle RNA-Seq format
## 171     RBCs of PBMCs array format
## 201     RBCs of PBMCs array format
## 413 B-cell of PBMCs RNA-Seq format
## 93  B-cell of PBMCs RNA-Seq format

Looks like random selections of a few samples in each pathology show the medai type.

Lets write this data out to csv after we also summarize the samples in the study by how they were derived or processed to get gene expression analysis.

pathologies4$studySummarized <- "summary of study"
head(pathologies4)
##         Ensembl_ID Genecards_ID FC_pathology_control   topGenePathology
## 1  ENSG00000211899         IGHM          18550.40000 Epstein Barr Virus
## 2  ENSG00000164458         TBXT           1051.20000 Epstein Barr Virus
## 3  ENSG00000211644     IGLV1-51            179.38824 Epstein Barr Virus
## 16 ENSG00000125869        LAMP5            140.00000 Epstein Barr Virus
## 4  ENSG00000163600         ICOS            105.00000 Epstein Barr Virus
## 12 ENSG00000124507      PACSIN1             75.04348 Epstein Barr Virus
##                       mediaType  studySummarized
## 1  LCLs of PBMCs RNA-Seq format summary of study
## 2  LCLs of PBMCs RNA-Seq format summary of study
## 3  LCLs of PBMCs RNA-Seq format summary of study
## 16 LCLs of PBMCs RNA-Seq format summary of study
## 4  LCLs of PBMCs RNA-Seq format summary of study
## 12 LCLs of PBMCs RNA-Seq format summary of study

We add in with copy and paste the summaries we made above on each study.

pathologies4$studySummarized[LymeIndices] <- "Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics. "
pathologies4$studySummarized[EBV_Indices] <- "The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes. So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples  as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells. "
pathologies4$studySummarized[MS_Indices] <- "In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA."
pathologies4$studySummarized[FM_Indices] <- "Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection."

Select a few samples of each and the 1st and last few columns.

pathologies4[c(1,2,81,82,102,103,153,154),c(1,5,6)]
##          Ensembl_ID                      mediaType
## 1   ENSG00000211899   LCLs of PBMCs RNA-Seq format
## 2   ENSG00000164458   LCLs of PBMCs RNA-Seq format
## 131 ENSG00000267368 skeletal muscle RNA-Seq format
## 141 ENSG00000268292 skeletal muscle RNA-Seq format
## 171 ENSG00000153822     RBCs of PBMCs array format
## 201 ENSG00000076984     RBCs of PBMCs array format
## 413 ENSG00000275064 B-cell of PBMCs RNA-Seq format
## 93  ENSG00000113532 B-cell of PBMCs RNA-Seq format
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        studySummarized
## 1   The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes. So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples  as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells. 
## 2   The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes. So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples  as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells. 
## 131                                                                                                                                                                                                                                                                                                                                                                                                                             Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection.
## 141                                                                                                                                                                                                                                                                                                                                                                                                                             Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection.
## 171                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics. 
## 201                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics. 
## 413                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA.
## 93                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA.

Looks good with a brief summary of the study design and collection of media and analysis.

Lets also add in the referenced GSE study ID to reference with their published gene expression data information.

pathologies4$GSE_study_ID <- "GSE study ID"

pathologies4\(mediaType[LymeIndices] <- "RBCs of PBMCs array format" pathologies4\)mediaType[EBV_Indices] <- “LCLs of PBMCs RNA-Seq format” pathologies4\(mediaType[MS_Indices] <- "B-cell of PBMCs RNA-Seq format" pathologies4\)mediaType[FM_Indices] <- “skeletal muscle RNA-Seq format”

pathologies4$GSE_study_ID[LymeIndices] <- "GSE145974"
pathologies4$GSE_study_ID[EBV_Indices] <- "GSE253756"
pathologies4$GSE_study_ID[MS_Indices] <- "GSE293036"
pathologies4$GSE_study_ID[FM_Indices] <- "GSE215154"

Select last few features and gene ID.

pathologies4[c(1,2,81,82,102,103,153,154),c(1,5,6,7)]
##          Ensembl_ID                      mediaType
## 1   ENSG00000211899   LCLs of PBMCs RNA-Seq format
## 2   ENSG00000164458   LCLs of PBMCs RNA-Seq format
## 131 ENSG00000267368 skeletal muscle RNA-Seq format
## 141 ENSG00000268292 skeletal muscle RNA-Seq format
## 171 ENSG00000153822     RBCs of PBMCs array format
## 201 ENSG00000076984     RBCs of PBMCs array format
## 413 ENSG00000275064 B-cell of PBMCs RNA-Seq format
## 93  ENSG00000113532 B-cell of PBMCs RNA-Seq format
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        studySummarized
## 1   The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes. So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples  as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells. 
## 2   The EBV or Epstein-Barr Viral infected samples were obtained from lymphoblastic cells in peripheral blood mononuclear cells. Not the same as B cells in multiple sclerosis for tissue type. The lymphoblastic B cells are cancerous uncontrolled growth B cells, where the regular B cells are the normal circulating white blood cells active in healthy immunity. There are 2 samples of control1, 2 samples of control2, 2 samples from patient 1, and 2 samples from patient 2. This is a total of 8 samples where each control and sample have a basal or non-stimulated baseline and 4 samples stimulated with IL27. They stimulated the samples with IL27 and found the samples with an allele lacking IL27RA had a slower healing time if any to EBV infection. These 8 samples used the raw counts of genes. So, we have 8 EBV samples as lymphoblastic B-cells of PBMCs stimulated with IL27, 86 Lyme disease samples as RBCs of PBMCs in various states of infection with antibiotics used, 12 Fibromyalgia samples as skeletal muscle of possibly rats and unknown if before or after being stimulated with MEF2 known to enhance pain causing inflammation, or before or after that stimulation as well as stimulation with DEX known to alleviate pain caused by inflammation, and 18 Multiple Sclerosis samples  as healthy B-cells of PBMCs without any stimulation but using 5 MS samples of commercial B-cells. 
## 131                                                                                                                                                                                                                                                                                                                                                                                                                             Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection.
## 141                                                                                                                                                                                                                                                                                                                                                                                                                             Fibromyalgia or the FM data obtained media from skeletal muscle using RNA-seq or high throughput sequencing. They injected the pain enducing myocyte enhancing factor 2 into healthy and the FM patients then injected DEX into those patients and found decreased inflammatory factors for pain in healthy and FM patients. Time point of when samples obtained before or after which injection is not apparent or clear.There were 5 healthy and 7 myofascial trigger point pain or fibromyalgia pain samples for a total of 12 samples. This project chose to use the fragments per kilo-million or fpkm instead of counts for each gene in the sample. It was listed under human or homo sapien, but the description for the samples each said derived from rats. Questions on this study are when the gene samples provided and are they human. Were they provided after the MEF2 injection to stimulate pain and see WBC activity or after the DEX injection delivered after the MEF2 injection.
## 171                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics. 
## 201                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics. 
## 413                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA.
## 93                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               In the multiple sclerosis high throughput sequencing, the B cells are obtained but not sure if from peripheral blood mononuclear cells and isolated, probably. Or if from bone marrow. There were 3 repeats of healthy controls, 5 repeats of the first MS patient, 5 repeats of the 2nd MS patient, and 5 repeats of the commercial line MS patient purchased by researchers to compare. This totals 15 MS samples and 3 healthy samples for 18 total samples. These samples were 20 base pair long nucleotide strands of complimentary DNA or cDNA.
##     GSE_study_ID
## 1      GSE253756
## 2      GSE253756
## 131    GSE215154
## 141    GSE215154
## 171    GSE145974
## 201    GSE145974
## 413    GSE293036
## 93     GSE293036

Now write this file out to csv to use later and to add to or take away if more useful information found or as more information found while analyzing more pathologies related to EBV to add to our information on EBV and Lyme disease. Looking for answers to the fibromyalgia data or better data to analyze and more associated EBV pathologies like the Mononucleosis, Hodgkin and Burkett Lymphomas, and throat carcinoma.

write.csv(pathologies4,'pathologies4_MS_FM_EBV_Lyme_addedInformation.csv', row.names=F)

Thanks for reading and keep checking in to see how this turns out. You can get this final data frame up until this point here.