Filter with Pathway gene, Sort, and Heatmap

Using scan to read gene list

df = read.csv("M1SD6.csv", header = TRUE, sep =,)
gene_set = scan("BCR_pathway_genes.csv", what="", sep="\n")

class(gene_set)
[1] "character"

head(gene_set)
[1] "BLNK"   "MAPK1"  "PTK2B"  "PIK3R1" "NFKBIE" "PDPK1" 

class(df)
[1] "data.frame"

head(df)
               ID  GeneID     X0h      X2h X6h     X24h
1   chr4:39459302    RPL9  0.0000 100.0000 100 100.0000
2   chr9:86585735  HNRNPK  0.0000 100.0000 100 100.0000
3 chr10:103868784    LDB1 74.3590  97.7273 100  92.1875
4  chr13:31035825   HMGB1 93.4783  92.8571 100 100.0000
5 chr10:102747637 C10orf2 94.2029 100.0000 100 100.0000
6   chr1:78414599   FUBP1 96.9697  96.0000 100  97.2973

setwd("C:/Users/shi-lab/Documents/heatmap")

df = read.csv("M1SD6.csv", header = TRUE, sep =,)
gene_set = scan("BCR_pathway_genes.csv", what="", sep="\n")

data = subset(df, GeneID == gene_set[1], select = c("ID", "GeneID", "X0h", "X2h", "X6h", "X24h"))

for(i in 2: length(gene_set)) {
  data = rbind(data, subset(df, GeneID == gene_set[i], select = c("ID", "GeneID", "X0h", "X2h", "X6h", "X24h")))
  
}

attach(data)
sorted_data = data[order(-X6h),]
detach(data)
# sorted_data = data[order(-data$X6h),]

sorted_data = sorted_data[!duplicated(sorted_data),]

write.csv(sorted_data, "M1SD6_BCR_pathway_gene.csv")

sorted_data = subset(sorted_data, select = - GeneID)
rownames(sorted_data) = sorted_data$ID
sorted_data = subset(sorted_data, select = - ID)

Heatmap(sorted_data, name = "percentage", col=colorRamp2(c(0,100), c("blue", "red")), column_title = "M1SD6 ranking by the intron retention percentage of junctions at 6h\nwith BCR Pathway Gene Set", show_row_names = FALSE, cluster_columns = FALSE, cluster_rows = FALSE)

Filter with Pathway gene, Sort, and Heatmap

Rose Park

February 26, 2018

Using scan to read gene list