1. Install packages and load libraries

#install.packages("tidyverse")
#install.packages("readxl")
#install.packages("ggplot2")
#install.packages("knitr")
library(tidyverse)
library(readxl)
library(ggplot2)
library(knitr)

2. Read in our data

#read in Meghan's excel sheet
#I selected a range that omits the first column A because it seemed irrelevant

df <- read_excel("24hr_Timepoint_meghan.xlsx", range="B1:F6")

Manipulate the data

The unlist() function doesn’t work as we need it to. This is because the sequence of genes is not a true list type, but a string representation of a list. So we use the strsplit() function to extract each gene name. We must use a regular expression to identify the characters that are not part of our gene names.

# Convert the string to a list of genes
gene_list <- strsplit(gsub("[{}']", "", df$HI7), ", ")[[1]]

# Now 'gene_list' contains individual gene names as separate elements
print(gene_list)
##  [1] "ANLN"      "MAF"       "LTF"       "TOP2A"     "CDCA2"     "HIST1H2BO"
##  [7] "FIGN"      "KIFC1"     "BUB1"      "APOL4"     "HIST1H1A"  "HIST1H2BF"
## [13] "DDIT4L"    "BIRC5"

So now I know how to extract each list of genes. I will write an apply() function that extracts each list of genes from each index, then assign back to the same data frame.

#unlist and extract each list of genes then reassign them to the same data frame
df[, 3:5] <- apply(df[, 3:5], 2, function(x) strsplit(gsub("[{}']", "", x), ", "))

head(df)

Now we can pivot the data frame so it is “long” instead of “wide”. We condense the last three columns into one column titled Column_name. The, the corresponding list of genes is moved to a column called Gene_list. With this, we can then add a column that gives us a count of the number of genes in each gene list.

#Use pivot_longer() function to mutate data from wide to long
df_long <- df %>%
  pivot_longer(cols = c(HI7, INTERSECTION, IFNw),
               names_to = "Column_name",
               values_to = "Gene_list")

#use mutate() function to add a new column that gives us the length of each list of genes. 
df_long <- df_long %>%
  mutate(length = lengths(Gene_list))

kable(df_long, caption="Final data frame manipulated to show lists of genes and respective lengths.")
Final data frame manipulated to show lists of genes and respective lengths.
CONC REGULATION Column_name Gene_list length
0.1 UP HI7 ANLN , MAF , LTF , TOP2A , CDCA2 , HIST1H2BO, FIGN , KIFC1 , BUB1 , APOL4 , HIST1H1A , HIST1H2BF, DDIT4L , BIRC5 14
0.1 UP INTERSECTION DHX58 , OAS2 , OAS1 , HIST1H2BB, EIF2AK2 , IFI35 , PARP9 , CMPK2 , IFITM3 , IFI27 , PLSCR1 , PARP14 , EPSTI1 , IFI44 , ETV7 , HSH2D , CFH , IRF9 , XAF1 , BATF2 , BST2 , MX2 , HRASLS2 , IFIT3 , TYMP , SAMD9L , CDCA7 , IFIT2 , SAMD9 , SAMHD1 , HERC5 , UBQLNL , IFITM1 , OAS3 , TRIM69 , SP110 , IDO1 , DDX58 , IFI6 , HELZ2 , OASL , IFIT1 , PSMB9 , TAP1 , HIST1H2BL, CTSO , HERC6 , LAMP3 , RTP4 , LAP3 , DDX60 , NCAPG , IFIT5 , STAT1 , PARP10 , HLA-B , PPM1K , IRF7 , GBP1 , USP18 , CKS2 , RSAD2 , MX1 , TF , DDX60L , IFI44L 66
0.1 UP IFNw TRANK1 , HIST1H2AD , ZNF625-ZNF20 , ANXA10 , C4A , STAT2 , CTD-2207O23.3 , SLC25A28 , PSMB8 , C19orf66 , HLA-A , HIST1H4I , HIST1H3G , HIST1H3J , SERPING1 , LRRN2 , APOL1 , UBQLN3 , TLR3 , HIST1H2BJ , CTD-2521M24.13, HSP90B1 , CASP1 , HIST1H2AG , HLA-E , GBP1P1 , ACE2 , CLDN23 , OR52V1P , TDRD7 , UBE2L6 , SLC15A3 , TNFSF13B , BTN3A3 , TRIM22 , LY6E , CTSL , HIST2H2AC , RP1-34B20.21 , MALT1 , ADAR , HIST1H2AJ , HIST2H2AB , PIK3AP1 , RMI2 , CARD16 , NMI , BTN3A2 , HIST2H2AA4 , HIST2H4A , TXNIP , OLFM5P , MTCH1 , GMPR , HIST1H3F , DTX3L , SMTNL1 , C4B , BTN3A1 , CRIP1 , BCL2L14 , TAGAP , TRIM21 , SP100 , OR52T1P , NEURL3 , HIST1H2AL , ISG20 , SLC16A1 , STC1 , HLA-C , HIST1H2AB , PLSCR2 , RP11-307N16.6 , FAM98A , NUDCD1 , PNPT1 , HIST1H4E , HLA-F , PARP12 , HIST1H2AM , PTPRR , SCO2 , HIST2H4B , HIST1H3C 85
0.1 DOWN HI7 EIF4BP2 , PER1 , CCL14 , DCAF1 , NDUFB4P2 , RP11-668G10.2 6
0.1 DOWN INTERSECTION ABC13-47488600E17.1, RP11-552F3.12 , SHISA2 , GRAMD1B , SLC5A5 , KRT87P , RP5-1092L12.2 , CYP26B1 , STATH , TFCP2L1 , NRP2 , CYP26A1 , HSD11B2 , ZBTB16 , MAN1C1 , CD55 16
0.1 DOWN IFNw C10orf113 , INO80 , GAREM2 , COX7A2 , INMT-FAM188B, PLEKHG1 , IL6 , PAICSP1 , BTBD16 , AQP5 , MYO1H , AC098826.5 , KRT16P2 , MUC6 , BACH2 , RP11-290L7.5, PAK3 , DAAM2 , CTSV , SLC26A2 , CDC42EP2 , MUC21 , SLC16A14 , PER2 , SOCS3 , PGAM1P5 , SNAPC4 , DMBT1 28
1.0 UP HI7 SERPINA3 , HIST1H2AJ, HIST1H2AL, B4GALNT4 , HIST1H2AD, HIST1H2AG, HIST2H2AB, HIST1H2BB, HIST1H2BO, HIST1H2AB, IGFBP6 , KRT16 , HIST2H4A , IL1RL1 , SPRR1B , CA9 , HIST2H3C , MUC5AC , CDCA7 , ECM1 , HIST1H3G , HIST1H3J , NUSAP1 , KIF4A , CLCA4 , MUC6 , PAG1 , HIST1H2AK, KRT6A , HIST1H3F , CLDN10 , HIST1H2BL, HIST1H2BI, ARFGEF2 , HIST1H2AM, MMP1 , NCAPG , ALOX15B , BHLHA15 , HIST2H2AC, LTF , CKS2 , MUC5B , BUB1 , AZIN2 , HIST1H2BF, NDUFA4L2 , HIST1H3C 48
1.0 UP INTERSECTION TRANK1 , OAS2 , CXCL6 , SAA2 , ANXA10 , IFI27 , C4A , EPSTI1 , IFI44 , ETV7 , OR52H1 , CTD-2207O23.3 , IRF9 , SLC25A28 , C19orf66 , IFIT3 , HLA-A , TYMP , SAMHD1 , HIST1H4I , IFITM1 , OAS3 , SP110 , DDX58 , SERPING1 , OASL , APOL1 , UBQLN3 , TAP1 , LAMP3 , LAP3 , DDX60 , HIST1H2BJ , IFIT5 , GBP1 , CTD-2521M24.13, HSP90B1 , TF , DDX60L , STUM , IFI35 , PARP9 , IFITM3 , GBP1P1 , PLSCR1 , PARP14 , OR52B5P , PATL2 , ACE2 , CLDN23 , TLDC2 , OR52V1P , UBE2L6 , HRASLS2 , SLC15A3 , IFIT2 , BTN3A3 , HERC5 , IDO1 , TRIM22 , LY6E , SEMA3D , CTSO , HERC6 , PARP10 , RP1-34B20.21 , MALT1 , EIF2AK2 , SAA1 , OR52H2P , RMI2 , APOL4 , NMI , BTN3A2 , HIST2H2AA4 , XAF1 , SAMD9L , OLFM5P , SAMD9 , UBQLNL , TRIM69 , GMPR , IFI6 , HELZ2 , SMTNL1 , IFIT1 , PSMB9 , C4B , BTN3A1 , RTP4 , STAT1 , HLA-B , PPM1K , TRIM21 , MX1 , KLK7 , OR52T1P , DHX58 , ISG20 , SLC16A1 , STC1 , OAS1 , CMPK2 , MMP13 , PLSCR2 , HSH2D , CFH , BATF2 , BST2 , MX2 , DUOX2 , HLA-F , PARP12 , PTPRR , TFF3 , IRF7 , USP18 , RSAD2 , HIST2H4B , IFI44L 120
1.0 UP IFNw CD274 , LIPA , RHOV , IFI30 , PTHLH , RORB , STARD5 , SBK1 , DUOXA2 , DRAP1 , ZNF625-ZNF20 , RBM43 , APOBEC3D , LRRC3 , AC093724.2 , APOL2 , PMAIP1 , STAT2 , GVINP1 , COL19A1 , PSMB8 , SHISA5 , AC127070.1 , TRIM14 , SPATS2L , TRIM6 , AC213203.1 , TNFSF10 , PSME2 , LAG3 , LRRN2 , RP1-102G20.4 , CR1L , FGD2 , TLR3 , SOCS1 , PTPRVP , RABGAP1L , BTC , GSDMB , RP11-686D22.3, LOX , PPM1J , CASP1 , CNP , XRN1 , HLA-E , OPTN , ATF3 , GBP5 , TAP2 , APOBEC3F , ODF3B , WARS , TRAFD1 , CD68 , KIF11 , B2M , TDRD7 , FBXO6 , TNFSF13B , PCSK9 , MCUB , FAM46A , NLRC5 , NUB1 , OGFR , CCDC103 , TOR1B , DDO , LGALS3BP , HIRA , OVOL1 , TMEM187 , PNCK , RBM11 , ZDHHC4P1 , ZNFX1 , MGAT3 , ZNF618 , ADAR , DPP10 , AKAP2 , PIK3AP1 , MYD88 , TSPAN2 , CXCL10 , CARD16 , SECTM1 , CD38 , TXNIP , VNN2 , ADAM17 , GPR143 , MLKL , MTCH1 , CFB , IFI16 , LMO2 , NT5C3A , DTX3L , C4orf33 , PML , UBA7 , SLFN12 , GRIP2 , RHBDF2 , BCL2L14 , MOB3C , APOBEC3G , NUPR1 , THAP12P7 , TAGAP , C7orf49 , TCF3P1 , CTD-2521M24.4, IL22RA1 , SP100 , GBP4 , HES4 , HLA-C , CMTR1 , SLFN5 , RP11-307N16.6, EXOC3L1 , FANCA , TRIM25 , KCTD15 , SOX9 , GLIPR1L1 , PNPT1 , TRIM5 , ZBP1 , APOL6 , APOL3 , OPRD1 , BBC3 , TREX1 , ALMS1P1 , SCO2 , ROR2 , ZNF775 , EFTUD1P1 143
1.0 DOWN HI7 VWCE , EIF3I , DCAF1 , STATH , SLIT3 , TMPRSS4-AS1, CYP24A1 , COX7A2 , TIGIT , CD55 10
1.0 DOWN INTERSECTION ABC13-47488600E17.1, AC098826.5 , PER1 , MAN1C1 , SHISA2 , RP11-552F3.12 , GRAMD1B , SLC5A5 , KRT87P , RP5-1092L12.2 , PCDHAC1 , DAAM2 , SLC16A14 , TFCP2L1 , CYP26A1 , HSD11B2 , ZBTB16 , INMT-FAM188B 18
1.0 DOWN IFNw C10orf113 , SLC13A2 , SMAD9 , RP11-334L9.1 , FCMR , KCNMA1 , CLCN5 , ANKRD36BP1 , PZP , SV2C , ERVMER34-1 , MYO1H , PPARGC1A , SMOX , ARHGDIB , CHST1 , PNMA6A , BACH2 , ABCA4 , BNIP3P4 , ZNF648 , RP11-460N11.2, NRCAM , CCDC110 , DMBT1 , ATP13A5 , SGSM1 , SLC26A2 , KLK13 , TMEM37 , SULT2B1 , ALDH1A3 32
10.0 UP HI7 SERPINA3 , HIST1H2AJ , HIST1H2AL , B4GALNT4 , HIST1H2AD , PTHLH , HIST1H2AG , HIST2H2AB , HIST1H2BB , HIST1H3C , HIST1H2BO , CXCL6 , HIST1H2AB , HIST1H4A , ACD , SERPINB4 , ISY1-RAB43, CLCA2 , HIST1H4C , KIF11 , FAM98A , KRT16 , HIST2H4A , IL1RL1 , SPRR1B , AC127070.1, CA9 , IFI27L1 , MUC5AC , HIST2H3C , CDCA7 , PCSK9 , ECM1 , HIST1H4I , HIST1H3G , CCDC103 , HIST1H3J , PAG1 , CLCA4 , KRT6A , HIST1H3F , CLDN10 , HIST1H2AK , KCTD15 , EGLN3 , HIST1H4E , HIST1H2BL , HIST1H2BI , LPCAT1 , TIMP1 , HIST1H2AM , MCAM , MMP1 , CPA4 , CTSL , NCAPG , ALOX15B , DSG3 , BHLHA15 , LTF , CKS2 , MUC5B , FAM83A , WNT7A , AIFM1 , NDUFA4L2 , STUM 67
10.0 UP INTERSECTION TRANK1 , LIPA , OAS2 , STARD5 , DUOXA2 , ZNF625-ZNF20 , SAA2 , ANXA10 , IFI27 , C4A , EPSTI1 , APOL2 , PMAIP1 , IFI44 , ETV7 , STAT2 , CTD-2207O23.3 , IRF9 , PSMB8 , SLC25A28 , C19orf66 , IFIT3 , HLA-A , TYMP , SAMHD1 , IFITM1 , OAS3 , SP110 , DDX58 , SERPING1 , LRRN2 , OASL , APOL1 , UBQLN3 , TAP1 , LAMP3 , TLR3 , LAP3 , DDX60 , HIST1H2BJ , IFIT5 , GBP1 , CTD-2521M24.13, HSP90B1 , TF , DDX60L , CASP1 , HLA-E , IFI35 , PARP9 , IFITM3 , GBP1P1 , PLSCR1 , PARP14 , PATL2 , ACE2 , CLDN23 , OR52V1P , TDRD7 , UBE2L6 , HRASLS2 , HLA-DPA1 , SLC15A3 , TNFSF13B , IFIT2 , BTN3A3 , HERC5 , IDO1 , TRIM22 , OVOL1 , LY6E , CTSO , HERC6 , PARP10 , HIST2H2AC , RP1-34B20.21 , MALT1 , EIF2AK2 , SAA1 , ADAR , RMI2 , APOL4 , NMI , BTN3A2 , HIST2H2AA4 , SECTM1 , XAF1 , TXNIP , VNN2 , SAMD9L , OLFM5P , SAMD9 , UBQLNL , TRIM69 , GMPR , CFB , IFI16 , IFI6 , HELZ2 , DTX3L , SMTNL1 , IFIT1 , C4orf33 , PML , PSMB9 , C4B , BTN3A1 , RTP4 , BCL2L14 , CHMP5 , HLA-B , PPM1K , STAT1 , TRIM21 , MX1 , SP100 , KLK7 , DHX58 , ISG20 , SLC16A1 , STC1 , OAS1 , CMPK2 , HLA-C , MMP13 , PLSCR2 , IGFBP6 , HSH2D , CFH , BATF2 , BST2 , MX2 , DUOX2 , NUDCD1 , PNPT1 , TRIM5 , HLA-F , PARP12 , PTPRR , TREX1 , TFF3 , IRF7 , USP18 , SCO2 , RSAD2 , HIST2H4B , IFI44L 147
10.0 UP IFNw CD274 , GPR132 , RP11-468E2.4 , TMEM268 , ENDOD1 , CEACAM1 , RORB , IFI30 , ENDOU , SBK1 , DRAP1 , APOBEC3D , LRRC3 , RBM43 , TMEM140 , CTC-209L16.1 , JADE2 , AC093724.2 , OR52H1 , HTR2B , GVINP1 , RET , COL19A1 , IL18BP , SHISA5 , ADGRF3 , FBXO39 , RBMS2 , UGT2B7 , PHLPP2 , TRIM14 , SPATS2L , TRIM6 , ARL11 , RPS6KC1 , TMSB10 , AC213203.1 , TMEM219 , RP11-403I13.10 , TNFSF10 , PSME2 , LAG3 , RP1-102G20.4 , MICE , PDSS1P1 , CR1L , TNFSF13 , TCHP , FAM231C , CDK18 , FAM122C , PLEKHN1 , FGD2 , SOCS1 , PTPRVP , RABGAP1L , HELB , BTC , GSDMB , RBCK1 , MTMR11 , RP11-686D22.3 , ITIH4 , LOX , ANO7L1 , PPM1J , TRIM31 , PSMA4 , CNP , HCG4 , SLC45A3 , STARD4 , XRN1 , RP11-345J18.2 , VSIG10L , HCAR3 , SYNPO2 , IL15RA , CCDC71L , OPTN , IL23A , FNDC3A , TAP2 , ATF3 , BLZF1 , COL16A1 , GBP5 , APOBEC3F , GBP6 , LGR6 , ODF3B , WARS , NCDN , HLA-J , OR52B5P , TRAFD1 , C2orf66 , CD68 , TLDC2 , B2M , SYCP2 , GATA3 , CYP21A2 , DYNLT1 , FBXO6 , KIF19 , ERAP2 , PLA1A , RP11-164J13.1 , FAM46A , NUB1 , OGFR , TOR1B , MCUB , NLRC5 , AC116366.6 , SUN2 , MUC3A , MVB12A , RASAL1 , PSMA2 , DDO , STX11 , LGALS3BP , HIRA , SLC1A3 , MICAL1 , TMEM187 , FXYD3 , E2F5 , LINC01296 , SEMA3D , CHRNB2 , RASGEF1B , C21orf91 , ITM2B , PRRT2 , RBM11 , ZDHHC4P1 , PI4K2B , JAK2 , PRRX2 , ZNFX1 , MGAT3 , RNF213 , CBLN3 , PSMB10 , BLOC1S3 , HCG4P3 , AZIN2 , ZNF618 , FGD3 , CAMK4 , DPP10 , COL9A2 , IGFBP4 , TMPRSS3 , ANO3 , AKAP2 , RP11-686D22.7 , USF1 , SLCO2A1 , GBP3 , WDR86 , SKIDA1 , MYD88 , PCGF5 , OR52H2P , PIK3AP1 , TSPAN2 , CXCL10 , TMEM229B , CARD16 , DLG2 , MICB , MASTL , USP30 , GLRX , OMG , MDK , CD38 , PARP3 , GCNT4 , ATRIP , ITPRIP , ADAM17 , TMEM139 , GPR143 , CCNA1 , TRIM38 , MLKL , MTCH1 , LMO2 , FAM65B , IFITM2 , CLEC7A , ZFYVE26 , NT5C3A , GMNC , SCARB2 , LGALS9 , C19orf38 , MDGA1 , UBA7 , GIMAP2 , SLFN12 , GRIP2 , RHBDF2 , PEX14 , KRT39 , IRF1 , MMAA , ABC7-42404400C24.1, PLEK2 , MOB3C , HDAC9 , APOBEC3G , NUPR1 , THAP12P7 , CX3CL1 , ACKR4 , TAGAP , C7orf49 , TCF3P1 , CTD-2521M24.4 , SCIN , IL22RA1 , PARP8 , LYPD5 , OR52T1P , GBP4 , RAB27B , OSR2 , ANKFY1 , AKAP7 , NEURL3 , CD37 , HDX , ABCD2 , CYP2J2 , M1AP , SLC38A5 , SPTSSB , HIST1H3H , GLIPR2 , THTPA , UNC93B1 , MUC13 , HES4 , CD47 , HFE , RP11-495P10.10 , SPOCD1 , TMEM106A , CMTR1 , AGRN , SLFN5 , HIST2H2BE , RP11-307N16.6 , FRMD3 , MTATP6P11 , PAOX , IKBKE , SLC27A3 , GTPBP1 , RNF114 , APOD , EXOC3L1 , MT2A , FANCA , RNF122 , LYSMD2 , TRIM25 , RNF43 , SOX9 , MEF2C , LIFR , GBP2 , AL078584.1 , ZBP1 , APOL6 , APOL3 , RNF19B , OPRD1 , ARFGEF2 , BBC3 , PGAP1 , NCEH1 , PRKD2 , SCLT1 , SLC6A16 , RP11-383G6.4 , RUBCN , SLC6A9 , ALMS1P1 , GCA , MUC15 , SYT15 , ABCD1 , AKAP3 , PLEKHG5 , EFTUD1P1 302