baySeq was run independently in Rolling and LCM samples Here I'm comparing how many of the C4 genes are differentially expressed in each sample
Read the data and merge with C4 gene annotations
agis <- read.csv("Cg_At_AGIs.csv", header=TRUE, sep=",", as.is=T)
agis2 <-agis[,c(1,7)]
C4TPMs <- read.csv("C4TPMs.csv", sep="\t",head=T, as.is=T)
colnames(C4TPMs)
## [1] "X" "gene" "AGI2" "CgTOT1" "CgTOT2" "CgTOT3"
## [7] "SE_TOT" "M1" "M2" "M3" "SE_M" "BS1"
## [13] "BS2" "BS3" "SE_BS" "LCM_M1" "LCM_M2" "LCM_M3"
## [19] "SE_LCMM" "LCM_BS1" "LCM_BS2" "LCM_BS3" "SE_LCMBS" "CgTOT"
## [25] "M" "BS" "LCM_M" "LCM_BS" "C4name.x" "Role.x"
## [31] "cell.x"
C4TPMs <-C4TPMs[, c(2,3,25:28,11,15,19,23)]
LCM <- read.delim("CgLCM_12sep.tsv",
header=T,
sep=",",
as.is=T)
LCM["ID"] <-LCM$X
LCM <-LCM[,c(-1,-2)]
rolling <- read.delim("CgRoll_12sep.tsv",
header=T,
sep=",",
as.is=T)
rolling["ID"] <-NA
rolling["ID"] <-rolling$X
rolling <-rolling[,c(-1,-2)]
C4 <- read.csv( "~/documents/data/TPMs/Final_Cleome_DE/C4gene_descriptions10sep.csv", header=TRUE, sep="\t", as.is=T)
C4LCM <- merge (x=C4, y=LCM, by.x="gene", by.y="ID",all=F)
colnames(C4LCM) <- c("gene","AGI","C4name","Role","cell","lcm_m1","lcm_m2","lcm_m3",
"lcm_bs1","lcm_bs2","lcm_bs3","Likelihood_lcm","DE_lcm","FDR_lcm")
C4roll <- merge (x=C4, y=rolling, by.x="gene", by.y="ID",all=F)
colnames(C4roll) <- c("gene","AGI","C4name","Role","cell","rol_m1","rol_m2","rol_m3",
"rol_bs1","rol_bs2","rol_bs3","Likelihood_rol","DE_rol","FDR_rol")
C4merge <- merge(x=C4LCM, y=C4roll, by.x="gene", by.y="gene",all=T)
C4merge <-C4merge[,c(-15:-18)]
C4merge[c("log2FC_LCM","log2FC_rol")] <-NA
C4merge$log2FC_LCM = log2((rowMeans(C4merge[,c(9:11)])/(rowMeans(C4merge[,c(6:8)]))))
C4merge$log2FC_rol = log2((rowMeans(C4merge[,c(18:20)])/(rowMeans(C4merge[,c(15:17)]))))
Merge data with TPMs
C4merge <- merge(x=C4merge, y=C4TPMs, by.x= "gene", by.y= "gene")
C4merge <- C4merge[,-26]
colnames(C4merge)<- c("gene","AGI","C4name","Role","cell","lcm_m1","lcm_m2","lcm_m3",
"lcm_bs1","lcm_bs2","lcm_bs3","Likelihood_lcm","DE_lcm","FDR_lcm",
"rol_m1","rol_m2","rol_m3","rol_bs1","rol_bs2","rol_bs3",
"Likelihood_rol","DE_rol","FDR_rol","log2FC_LCM","log2FC_rol","avgTPMs_rolM","avgTPMs_rolBS",
"avgTPMs_lcmM","avgTPMs_lcmBS","SE_rolM","SE_rolBS","SE_lcmM","SE_lcmBS")
write.csv(C4merge, "C4rol_lcm_13sep.csv")
Using a Likelihood of >= 75% to determine which C4 genes are differentially expressed
bothdif <-subset(C4merge, Likelihood_lcm >= 0.75 & Likelihood_rol >= 0.75,
select= c(C4name,cell,log2FC_LCM,log2FC_rol, Likelihood_lcm,
Likelihood_rol,avgTPMs_rolM,avgTPMs_rolBS,
avgTPMs_lcmM,avgTPMs_lcmBS,SE_rolM,SE_rolBS,SE_lcmM,SE_lcmBS))
write.csv(bothdiff, "bothdiff.csv")
## Error: object 'bothdiff' not found
bothdif
## C4name cell log2FC_LCM log2FC_rol Likelihood_lcm Likelihood_rol
## 6 CA1 M -1.6381 -2.858 0.9942 0.8367
## 8 RPI.2 BS 1.0459 -1.779 0.9934 0.8883
## 20 AdeK.1 M -1.0642 -2.162 0.9816 0.8453
## 21 AdeK.2 M -1.2818 -2.926 0.9430 0.9801
## 30 NADME2.2 BS 0.9255 3.473 0.9574 0.9990
## 32 cMDH M/BS 2.6554 6.005 0.9960 0.9933
## 34 FBA5 M/BS -1.7679 -3.721 0.9854 0.9973
## 37 PPDK M -1.0544 -1.647 0.8686 1.0000
## 39 NADME1.1 BS 3.1008 5.755 0.8951 0.9699
## avgTPMs_rolM avgTPMs_rolBS avgTPMs_lcmM avgTPMs_lcmBS SE_rolM
## 6 586.9000 140.66 2484.4033 843.413 3.050e+02
## 8 210.5167 106.47 290.2000 633.637 7.702e+01
## 20 911.4133 341.57 2146.8600 1084.143 2.055e+02
## 21 1157.4033 257.74 1595.8200 693.827 3.784e+02
## 30 2.8500 55.06 4.4333 8.947 1.361e-01
## 32 0.1067 17.61 0.6967 4.837 5.239e-02
## 34 147.1933 18.34 125.1500 38.693 7.673e+01
## 37 7038.7733 3839.71 3076.9600 1562.063 1.266e+03
## 39 0.7333 79.46 3.2600 29.013 2.099e-01
## SE_rolBS SE_lcmM SE_lcmBS
## 6 24.405 197.8750 43.0857
## 8 8.874 5.9806 18.5622
## 20 54.414 47.5454 69.7757
## 21 29.135 61.6092 62.9035
## 30 13.537 0.4863 0.4592
## 32 6.660 0.1041 1.0049
## 34 1.029 18.8515 5.1489
## 37 927.770 392.6989 158.2966
## 39 58.265 1.1943 5.6518
lcmdif <-subset(C4merge, Likelihood_lcm >= 0.75,
select= c(C4name,cell,log2FC_LCM, Likelihood_lcm,
avgTPMs_lcmM,avgTPMs_lcmBS,SE_lcmM,SE_lcmBS))
write.csv(lcmdif, "lcmdif.csv")
lcmdif
## C4name cell log2FC_LCM Likelihood_lcm avgTPMs_lcmM avgTPMs_lcmBS
## 1 RP1 M -1.1649 0.9701 10.5500 4.930
## 3 UCP5 BS 1.1334 0.9950 902.8567 2094.130
## 4 RPE M/BS 1.0491 0.9705 582.3667 1275.993
## 5 PEPC2 M -1.3406 0.9985 451.7167 188.740
## 6 CA1 M -1.6381 0.9942 2484.4033 843.413
## 8 RPI.2 BS 1.0459 0.9934 290.2000 633.637
## 11 PRK.1 BS 0.8311 0.9402 1.6800 3.177
## 13 ASP5 M/BS -1.0039 0.9955 65.1067 34.377
## 14 CUE1/PPT1.1 M -1.1427 0.8534 2359.6733 1128.010
## 15 PEPCkinase M -1.8460 0.9752 111.0533 32.443
## 16 FBA2.1 BS 1.0107 0.8393 494.6833 1055.110
## 18 FBA2.2 BS 0.8997 0.8501 144.1233 284.470
## 20 AdeK.1 M -1.0642 0.9816 2146.8600 1084.143
## 21 AdeK.2 M -1.2818 0.9430 1595.8200 693.827
## 26 TPT.2 M/BS 0.9677 0.9912 297.2867 613.420
## 28 HCEF1/FBPase BS 0.9392 0.9388 751.6400 1523.753
## 30 NADME2.2 BS 0.9255 0.9574 4.4333 8.947
## 32 cMDH M/BS 2.6554 0.9960 0.6967 4.837
## 34 FBA5 M/BS -1.7679 0.9854 125.1500 38.693
## 35 NADME2.3 BS 1.2223 0.9951 645.5400 1589.990
## 37 PPDK M -1.0544 0.8686 3076.9600 1562.063
## 39 NADME1.1 BS 3.1008 0.8951 3.2600 29.013
## 40 TKS1 BS 0.9910 0.9781 177.1167 372.817
## 41 RBCS1B BS -1.3527 0.9758 43.7767 18.127
## 54 CA2 M -1.4917 1.0000 8294.6667 3109.213
## 56 RPI.1 BS 1.6390 0.9942 6.1333 20.267
## SE_lcmM SE_lcmBS
## 1 0.4613 0.8890
## 3 33.8956 37.7978
## 4 13.5801 87.0063
## 5 23.3697 7.9255
## 6 197.8750 43.0857
## 8 5.9806 18.5622
## 11 0.1106 0.1284
## 13 4.0024 1.5633
## 14 70.8978 110.4967
## 15 16.0982 5.2849
## 16 9.6523 121.1466
## 18 1.4261 24.8403
## 20 47.5454 69.7757
## 21 61.6092 62.9035
## 26 12.1579 12.1072
## 28 23.8498 77.3013
## 30 0.4863 0.4592
## 32 0.1041 1.0049
## 34 18.8515 5.1489
## 35 16.2344 26.1232
## 37 392.6989 158.2966
## 39 1.1943 5.6518
## 40 4.6215 17.8603
## 41 4.6175 2.1873
## 54 760.3968 443.8278
## 56 1.0874 1.3711
roldif <-subset(C4merge, Likelihood_rol >= 0.75,
select= c(C4name,cell,log2FC_rol,
Likelihood_rol,avgTPMs_rolM,avgTPMs_rolBS,
SE_rolM,SE_rolBS))
write.csv(roldif, "roldif.csv")
roldif
## C4name cell log2FC_rol Likelihood_rol avgTPMs_rolM avgTPMs_rolBS
## 6 CA1 M -2.858 0.8367 586.9000 140.66
## 7 AMK2 M -1.224 0.7781 89.7600 66.05
## 8 RPI.2 BS -1.779 0.8883 210.5167 106.47
## 17 TIM M -2.051 0.9556 643.3000 263.31
## 19 BASS2 M -2.851 0.8562 2159.2300 514.45
## 20 AdeK.1 M -2.162 0.8453 911.4133 341.57
## 21 AdeK.2 M -2.926 0.9801 1157.4033 257.74
## 30 NADME2.2 BS 3.473 0.9990 2.8500 55.06
## 32 cMDH M/BS 6.005 0.9933 0.1067 17.61
## 34 FBA5 M/BS -3.721 0.9973 147.1933 18.34
## 37 PPDK M -1.647 1.0000 7038.7733 3839.71
## 39 NADME1.1 BS 5.755 0.9699 0.7333 79.46
## SE_rolM SE_rolBS
## 6 3.050e+02 24.405
## 7 7.221e+00 8.842
## 8 7.702e+01 8.874
## 17 1.034e+02 19.718
## 19 4.943e+02 92.830
## 20 2.055e+02 54.414
## 21 3.784e+02 29.135
## 30 1.361e-01 13.537
## 32 5.239e-02 6.660
## 34 7.673e+01 1.029
## 37 1.266e+03 927.770
## 39 2.099e-01 58.265