library(knitr)
In order to check the difference of the P-values with paired and non-paired samples, as well as between LUSC and LUAD. We conducted the following analysis.
first we should load our customed R function for t-test or wilcox test
library("stringr")
setwd("/hgcnt44fs/sguo/methylation")
## Error: cannot change working directory
PairWilPValue <- function(data, x1, x2) {
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
out <- data.frame()
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- try(wilcox.test(as.numeric(data[i, x1]), as.numeric(data[i,
x2]), paired = T, na.action = na.omit))
output[i, 1] <- tmp1$p.value
output[i, 2] <- median(as.numeric(data[i, x1])) - median(as.numeric(data[i,
x2]))
output[i, 3] <- median(as.numeric(data[i, x1]))
output[i, 4] <- median(as.numeric(data[i, x2]))
# print(i)
}
}
out <- cbind(rownames(data), output)
out
}
NonPairWilPValue <- function(data, x1, x2) {
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
out <- data.frame()
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- try(wilcox.test(as.numeric(data[i, x1]), as.numeric(data[i,
x2]), paired = F, na.action = na.omit))
output[i, 1] <- tmp1$p.value
output[i, 2] <- median(as.numeric(data[i, x1]), na.rm = T) - median(as.numeric(data[i,
x2]), na.rm = T)
output[i, 3] <- median(as.numeric(data[i, x1]), na.rm = T)
output[i, 4] <- median(as.numeric(data[i, x2]), na.rm = T)
# print(i)
}
}
out <- cbind(rownames(data), output)
out
}
PairttestPValue <- function(data, x1, x2) {
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
out <- data.frame()
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]),
paired = T, na.action = na.omit))
output[i, 1] <- tmp1$p.value
output[i, 2] <- median(as.numeric(data[i, x1]), na.rm = T) - median(as.numeric(data[i,
x2]), na.rm = T)
output[i, 3] <- median(as.numeric(data[i, x1]), na.rm = T)
output[i, 4] <- median(as.numeric(data[i, x2]), na.rm = T)
# print(i)
}
}
out <- cbind(rownames(data), output)
out
}
NonPairttestPValue <- function(data, x1, x2) {
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
out <- data.frame()
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]),
paired = F, na.action = na.omit))
output[i, 1] <- tmp1$p.value
output[i, 2] <- mean(as.numeric(data[i, x1])) - mean(as.numeric(data[i,
x2]))
output[i, 3] <- mean(as.numeric(data[i, x1]))
output[i, 4] <- mean(as.numeric(data[i, x2]))
# print(i)
}
}
out <- cbind(rownames(data), output)
out
}
load data collection function as the following:
DatacollectionByCancer <- function(cancer) {
rlt <- list()
result1 <- c()
result2 <- c()
P1 <- c()
P2 <- c()
sta1 <- c()
sta2 <- c()
pattern = paste("jhu-usc.edu_", cancer, ".*", sep = "")
print(pattern)
file = list.files(pattern = pattern)
idv <- unique(as.array(str_extract(file, "TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*")))
idv1 <- as.array(str_extract(file, "TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
pairidv <- c()
for (i in 1:length(idv)) {
t1 <- paste(idv[i], "-01", sep = "")
t2 <- paste(idv[i], "-11", sep = "")
if (all(any(grepl(t1, file)), any(grepl(t2, file)))) {
pairidv <- c(pairidv, t1, t2)
}
}
pairfile <- file[sapply(pairidv, function(x) {
x <- grep(x, file)
x[length(x)]
})]
allfile <- file[sapply(idv1, function(x) {
x <- grep(x, file)
x[length(x)]
})]
data1 <- c()
for (i in 1:length(pairfile)) {
tmp <- read.table(pairfile[i], head = T, sep = "\t", as.is = F, skip = 1) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
data1 <- cbind(data1, tmp[, 2])
# print(c(i,cancer))
}
data2 <- c()
for (i in 1:length(allfile)) {
tmp <- read.table(allfile[i], head = T, sep = "\t", as.is = F, skip = 1) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
data2 <- cbind(data2, tmp[, 2])
# print(c(i,cancer))
}
rownames(data1) = tmp[, 1]
colnames(data1) = pairidv
rownames(data2) = tmp[, 1]
colnames(data2) = idv1
file1 = paste(cancer, "_pair.RData")
file2 = paste(cancer, "_non_pair.RData")
save(data1, file = file1)
save(data2, file = file2)
rlt$pair <- data1
rlt$nonpair <- data2
rlt
}
load pair data set pvalue calcuation function as the following:
PairPvalueCalcuateByCancer <- function(data) {
# differential methylation calculation
rlt <- list()
data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2]) # row=gene, col=inv
type <- substr(colnames(data), 14, 15)
x1 <- which(type == "01") # type 1, cancer or sensitive
x2 <- which(type == "11") # type 2, normal or resistant
out1 <- PairWilPValue(data, x1, x2)
colnames(out1) <- c("GeneName", "Pvalue", "Statistic", "mean1", "mean2")
rlt$p <- out1[, 2]
rlt$sta <- out1[, 3]
rlt
}
PairPvalueCalcuateByCancerBylogistic <- function(data) {
# differential methylation calculation
rlt <- list()
data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2]) # row=gene, col=inv
type <- substr(colnames(data), 14, 15)
x1 <- which(type == "01") # type 1, cancer or sensitive
x2 <- which(type == "11") # type 2, normal or resistant
y <- abs(as.numeric(as.factor(type)) - 2)
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- summary(try(glm(y ~ data[i, ])))
output[i, 1] <- tmp1$coefficients[2, 4]
output[i, 2] <- median(as.numeric(data[i, x1])) - median(as.numeric(data[i,
x2]))
output[i, 3] <- median(as.numeric(data[i, x1]))
output[i, 4] <- median(as.numeric(data[i, x2]))
print(i)
}
}
out1 <- data.frame(Genename = rownames(data), output)
rlt$cpg <- out1[, 1]
rlt$p <- out1[, 2]
rlt$sta <- out1[, 3]
rlt$meth1 <- out1[, 4]
rlt$meth2 <- out1[, 5]
rlt
}
PairttestPValue <- function(data, x1, x2) {
output <- matrix(NA, dim(data)[1], 4) # set output matrix ()
for (i in 1:dim(data)[1]) {
out <- data.frame()
if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i,
])) < 0.5 * length(data[i, ]))) {
tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]),
paired = T, na.action = na.omit))
output[i, 1] <- tmp1$p.value
output[i, 2] <- mean(as.numeric(data[i, x1])) - mean(as.numeric(data[i,
x2]))
output[i, 3] <- mean(as.numeric(data[i, x1]))
output[i, 4] <- mean(as.numeric(data[i, x2]))
# print(i)
}
}
out <- cbind(rownames(data), output)
out
}
load non-pair data set pvalue calcuation function as the following:
NonPairwilcoxPvalueCalcuateByCancer <- function(data) {
# differential methylation calculation
rlt <- list()
data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2]) # row=gene, col=inv
type <- substr(colnames(data), 14, 15)
x1 <- which(type == "01") # type 1, cancer or sensitive
x2 <- which(type == "11") # type 2, normal or resistant
out1 <- NonPairWilPValue(data, x1, x2)
colnames(out1) <- c("GeneName", "Pvalue", "Statistic", "mean1", "mean2")
rlt$p <- out1[, 2]
rlt$sta <- out1[, 3]
rlt
}
2.1 Data Collection Process
# luad<-DatacollectionByCancer('LUAD') lusc<-DatacollectionByCancer('LUSC')
# dim(luad$data1) dim(luad$data2) dim(lusc$data1) dim(lusc$data2)
# colnames(luad$data1)[1:5] colnames(luad$data2)[1:5]
# colnames(lusc$data1)[1:5] colnames(lusc$data2)[1:5]
# rownames(luad$data1)[1:5] rownames(luad$data2)[1:5]
# rownames(lusc$data1)[1:5] rownames(lusc$data2)[1:5]
2.2 P-value calcualtion for pair and nonpair luad and lusc dataset.
list.files(pattern = "*.RData")
## [1] "luad.pair.pvalue.RData" "lungcancer.4.pvalue.RData"
## [3] "lusc.pair.pvalue.RData"
load("data_pair_luad.RData")
## Warning: cannot open compressed file 'data_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data1)
## Error: object 'data1' not found
load("data_pair_lusc.RData")
## Warning: cannot open compressed file 'data_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data1)
## Error: object 'data1' not found
load("data_non_pair_luad.RData")
## Warning: cannot open compressed file 'data_non_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data2)
## Error: object 'data2' not found
load("data_non_pair_lusc.RData")
## Warning: cannot open compressed file 'data_non_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data2)
## Error: object 'data2' not found
since we have collected the data before, therefore, here we load the previous data directly.
library("preprocessCore")
## Error: there is no package called 'preprocessCore'
list.files(pattern = "*.RData")
## [1] "luad.pair.pvalue.RData" "lungcancer.4.pvalue.RData"
## [3] "lusc.pair.pvalue.RData"
lusc <- list()
luad <- list()
load("data_pair_luad.RData")
## Warning: cannot open compressed file 'data_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data1
## Error: object 'data1' not found
luad$pair <- normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(luad$pair) = colnames(tmp)
## Error: object 'tmp' not found
rownames(luad$pair) = rownames(tmp)
## Error: object 'tmp' not found
load("data_non_pair_luad.RData")
## Warning: cannot open compressed file 'data_non_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data2
## Error: object 'data2' not found
luad$nonpair <- normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(luad$nonpair) = colnames(tmp)
## Error: object 'tmp' not found
rownames(luad$nonpair) = rownames(tmp)
## Error: object 'tmp' not found
load("data_pair_lusc.RData")
## Warning: cannot open compressed file 'data_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data1
## Error: object 'data1' not found
lusc$pair = normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(lusc$pair) <- colnames(tmp)
## Error: object 'tmp' not found
rownames(lusc$pair) <- rownames(tmp)
## Error: object 'tmp' not found
load("data_non_pair_lusc.RData")
## Warning: cannot open compressed file 'data_non_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data2
## Error: object 'data2' not found
lusc$nonpair = normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(lusc$nonpair) <- colnames(tmp)
## Error: object 'tmp' not found
rownames(lusc$nonpair) <- rownames(tmp)
## Error: object 'tmp' not found
2.2.1 P-value calcualtion for pair and nonpair luad and lusc dataset based on raw dataset
luad_pair_rlt <- PairPvalueCalcuateByCancer(luad$pair)
## Error: non-numeric matrix extent
lusc_pair_rlt <- PairPvalueCalcuateByCancer(lusc$pair)
## Error: non-numeric matrix extent
luad_nonpair_rlt <- NonPairwilcoxPvalueCalcuateByCancer(luad$nonpair)
## Error: non-numeric matrix extent
lusc_nonpair_rlt <- NonPairwilcoxPvalueCalcuateByCancer(lusc$nonpair)
## Error: non-numeric matrix extent
pvalue <- list()
pvalue$luad_pair_rlt = luad_pair_rlt
## Error: object 'luad_pair_rlt' not found
pvalue$lusc_pair_rlt = lusc_pair_rlt
## Error: object 'lusc_pair_rlt' not found
pvalue$luad_nonpair_rlt = luad_nonpair_rlt
## Error: object 'luad_nonpair_rlt' not found
pvalue$lusc_nonpair_rlt = lusc_nonpair_rlt
## Error: object 'lusc_nonpair_rlt' not found
save(pvalue, file = "lungcancer.4.pvalue.RData")
tmp1 <- na.omit(data.frame(p = -log(as.numeric(as.character(pvalue$luad_pair_rlt$p)),
10), sta = as.numeric(pvalue$luad_pair_rlt$sta)))
tmp2 <- na.omit(data.frame(p = -log(as.numeric(pvalue$lusc_pair_rlt$p), 10),
sta = as.numeric(pvalue$lusc_pair_rlt$sta)))
tmp3 <- na.omit(data.frame(p = -log(as.numeric(pvalue$luad_nonpair_rlt$p), 10),
sta = as.numeric(pvalue$luad_nonpair_rlt$sta)))
tmp4 <- na.omit(data.frame(p = -log(as.numeric(pvalue$lusc_nonpair_rlt$p), 10),
sta = as.numeric(pvalue$lusc_nonpair_rlt$sta)))
jpeg("diplot.pvalue.and.sta.1.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmp1[, 2], tmp1[, 1], xlab = "methylation difference", ylab = "-log(P-value)",
cex = 0.1, main = "LUAD Paired Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
plot(tmp2[, 2], tmp2[, 1], cex = 0.1, xlab = "methylation difference", ylab = "",
main = "LUSC Paired Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf
## 2
jpeg("diplot.pvalue.and.sta.2.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmp3[, 2], tmp3[, 1], xlab = "methylation difference", ylab = "-log(P-value)",
cex = 0.1, main = "LUAD Whole Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
plot(tmp4[, 2], tmp4[, 1], xlab = "methylation difference", ylab = "", cex = 0.1,
main = "LUSC Whole Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf
## 2
tmp1 <- (data.frame(p = -log(as.numeric(as.character(pvalue$luad_pair_rlt$p)),
10), sta = as.numeric(pvalue$luad_pair_rlt$sta)))
tmp2 <- (data.frame(p = -log(as.numeric(pvalue$lusc_pair_rlt$p), 10), sta = as.numeric(pvalue$lusc_pair_rlt$sta)))
tmp3 <- (data.frame(p = -log(as.numeric(pvalue$luad_nonpair_rlt$p), 10), sta = as.numeric(pvalue$luad_nonpair_rlt$sta)))
tmp4 <- (data.frame(p = -log(as.numeric(pvalue$lusc_nonpair_rlt$p), 10), sta = as.numeric(pvalue$lusc_nonpair_rlt$sta)))
tmpc1 <- data.frame(luad = tmp1[, 2], lusc = tmp2[, 2])
tmpc2 <- data.frame(luad = tmp3[, 2], lusc = tmp4[, 2])
jpeg("diplot.pvalue.and.sta.3.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmpc1[, 1], tmpc1[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -0.3, lty = 2, col = 2, lwd = 4)
abline(h = 0.3, lty = 2, col = 2, lwd = 4)
plot(tmpc2[, 1], tmpc2[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -0.3, lty = 2, col = 2, lwd = 4)
abline(h = 0.3, lty = 2, col = 2, lwd = 4)
dev.off()
## pdf
## 2
tmpc1 <- data.frame(luad = tmp1[, 1], lusc = tmp2[, 1])
tmpc2 <- data.frame(luad = tmp3[, 1], lusc = tmp4[, 1])
jpeg("diplot.pvalue.and.sta.4.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmpc1[, 1], tmpc1[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma",
cex = 0.01, axes = T)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
abline(v = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
plot(tmpc2[, 1], tmpc2[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma",
cex = 0.01, axes = T)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
abline(v = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf
## 2
tmpcc1 <- data.frame(luad = tmp1[, 1:2], lusc = tmp2[, 1:2])
tmcpp1 <- subset(tmpcc1, luad.p > -log(0.05/485577, 10) & lusc.p > -log(0.05/485577,
10) & abs(luad.sta) > 0.3 & abs(luad.sta) > 0.3)
tmp11 <- nrow(subset(tmp1, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp21 <- nrow(subset(tmp2, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp31 <- nrow(subset(tmp3, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp41 <- nrow(subset(tmp4, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp11
## [1] 0
tmp21
## [1] 0
tmp31
## [1] 0
tmp41
## [1] 0
there are tmp11 significant differential methylated genes (and the delta beta is great than 0.3) based on paired LUAD dataset
Figure 1A. P-value compare between LUAD and LUSC based on pair dataset
tiff("pvaluecompare.tiff")
par(mfrow = c(2, 2))
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
Figure 1B. P-value compare between LUAD and LUSC based on non-pair dataset
P1 <- luad_nonpair_rlt$p
## Error: object 'luad_nonpair_rlt' not found
P2 <- lusc_nonpair_rlt$p
## Error: object 'lusc_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
dev.off()
## tiff
## 3
Figure 1C. P-value compare between pair and non-pair based on LUAD
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- luad_nonpair_rlt$p
## Error: object 'luad_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
Figure 1D. P-value compare between pair and non-pair based on LUSC
P1 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P2 <- lusc_nonpair_rlt$p
## Error: object 'lusc_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
2.2 P-value calcualtion for pair and nonpair luad and lusc dataset based on same sample size
ncol(luad$pair)
## NULL
ncol(lusc$pair)
## NULL
min <- min(ncol(luad$pair), ncol(lusc$pair))
## Warning: no non-missing arguments to min; returning Inf
min
## [1] Inf
luad_pair_rlt <- PairPvalueCalcuateByCancer(luad$pair[, 1:min])
## Error: result would be too long a vector
lusc_pair_rlt <- PairPvalueCalcuateByCancer(lusc$pair[, 1:min])
## Error: result would be too long a vector
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
jpeg("lusc.luad.n52.pvalue.jpeg")
plot(P11, P12, cex = 0.5, xlim = c(-10, 0), ylim = c(-10, 0), xlab = "Lung Adenocarcinoma",
ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 2)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 2)
## Error: object 'P11' not found
dev.off()
## pdf
## 2
manhattan plot for lung adenocarcinoma and lung squamous cell carcinoma
pattern = paste("jhu-usc.edu_", "LUAD", ".*", sep = "")
print(pattern)
## [1] "jhu-usc.edu_LUAD.*"
file = list.files(pattern = pattern)
tmp <- read.table(file[1], head = T, sep = "\t", as.is = F, skip = 1) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'NA': No such file or directory
## Error: cannot open the connection
sortanno <- read.table("annotation450.sort.txt", head = F, sep = "\t", as.is = T) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'annotation450.sort.txt': No such file or
## directory
## Error: cannot open the connection
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
NP11 <- P1[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P1' not found
NP12 <- P2[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P2' not found
P1 <- as.numeric(luad_pair_rlt$p)
## Error: object 'luad_pair_rlt' not found
P2 <- as.numeric(lusc_pair_rlt$p)
## Error: object 'lusc_pair_rlt' not found
sta1 <- as.numeric(luad_pair_rlt$sta)
## Error: object 'luad_pair_rlt' not found
sta2 <- as.numeric(lusc_pair_rlt$sta)
## Error: object 'lusc_pair_rlt' not found
effectloci <- max(c(which(sortanno[, 2] == "X"), which(sortanno[, 2] == "Y"))) +
1
## Error: object 'sortanno' not found
region <- effectloci:nrow(sortanno)
## Error: object 'effectloci' not found
Man1 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region,
2]), BP = sortanno[region, 3], P = NP11[region])
## Error: object 'sortanno' not found
Man2 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region,
2]), BP = sortanno[region, 3], P = NP12[region])
## Error: object 'sortanno' not found
Man1$SNP = as.character(Man1$SNP)
## Error: object 'Man1' not found
Man1$CHR = as.numeric(as.character(Man1$CHR))
## Error: object 'Man1' not found
Man2$SNP = as.character(Man2$SNP)
## Error: object 'Man2' not found
Man2$CHR = as.numeric(as.character(Man2$CHR))
## Error: object 'Man2' not found
Man1 <- na.omit(Man1)
## Error: object 'Man1' not found
Man2 <- na.omit(Man2)
## Error: object 'Man2' not found
tiff("manhattan.tiff")
par <- par(mfrow = c(2, 1))
manhattan(Man1, main = "Manhattan Plot For Lung Adenocarcinoma", cex = 0.5,
cex.axis = 0.8, col = c("blue4", "orange3"), genomewideline = -log(0.01/485577,
10), suggestiveline = F, cex.main = 0.8)
## Error: could not find function "manhattan"
manhattan(Man2, main = "Manhattan Plot For Lung Squamous Cell Carcinoma", cex = 0.5,
cex.axis = 0.8, col = c("blue4", "orange3"), genomewideline = -log(0.01/485577,
10), suggestiveline = F, cex.main = 0.8)
## Error: could not find function "manhattan"
dev.off()
## pdf
## 2
Pvalue = data.frame(tmp[, 3], P)
## Error: object 'tmp' not found
colnames(Pvalue) = c("GeneSymbol", colnames(P))
## Error: object 'P' not found
rownames(Pvalue) <- rownames(data)
## Error: object 'Pvalue' not found
2.3 P-value calcualtion for pair and nonpair luad and lusc dataset based on same sample size by logitic regression
ncol(luad$pair)
## NULL
ncol(lusc$pair)
## NULL
min <- min(ncol(luad$pair), ncol(lusc$pair))
## Warning: no non-missing arguments to min; returning Inf
min
## [1] Inf
luad_pair_rlt <- PairPvalueCalcuateByCancerBylogistic(luad$pair[, 1:min])
## Error: result would be too long a vector
lusc_pair_rlt <- PairPvalueCalcuateByCancerBylogistic(lusc$pair[, 1:min])
## Error: result would be too long a vector
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
jpeg("lusc.luad.n52.pvalue.jpeg")
plot(P11, P12, cex = 0.5, xlim = c(-10, 0), ylim = c(-10, 0), xlab = "Lung adenocarcinoma",
ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 2)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 2)
## Error: object 'P11' not found
dev.off()
## pdf
## 2
pattern = paste("jhu-usc.edu_", "LUAD", ".*", sep = "")
print(pattern)
## [1] "jhu-usc.edu_LUAD.*"
file = list.files(pattern = pattern)
tmp <- read.table(file[1], head = T, sep = "\t", as.is = F, skip = 1) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'NA': No such file or directory
## Error: cannot open the connection
sortanno <- read.table("annotation450.sort.txt", head = F, sep = "\t", as.is = T) # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'annotation450.sort.txt': No such file or
## directory
## Error: cannot open the connection
NP11 <- P11[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P11' not found
NP12 <- P12[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P12' not found
effectloci <- max(c(which(sortanno[, 2] == "X"), which(sortanno[, 2] == "Y"))) +
1
## Error: object 'sortanno' not found
region <- effectloci:nrow(sortanno)
## Error: object 'effectloci' not found
Man1 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region,
2]), BP = sortanno[region, 3], P = NP11[region])
## Error: object 'sortanno' not found
Man2 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region,
2]), BP = sortanno[region, 3], P = NP12[region])
## Error: object 'sortanno' not found
Man1$SNP = as.character(Man1$SNP)
## Error: object 'Man1' not found
Man1$CHR = as.numeric(as.character(Man1$CHR))
## Error: object 'Man1' not found
Man1$P = 10^Man1$P
## Error: object 'Man1' not found
Man2$SNP = as.character(Man2$SNP)
## Error: object 'Man2' not found
Man2$CHR = as.numeric(as.character(Man2$CHR))
## Error: object 'Man2' not found
Man2$P = 10^Man2$P
## Error: object 'Man2' not found
Man1 <- na.omit(Man1)
## Error: object 'Man1' not found
Man2 <- na.omit(Man2)
## Error: object 'Man2' not found
tiff("manhattan.tiff")
par <- par(mfrow = c(2, 1))
manhattan(Man1, main = "Manhattan Plot", cex = 0.5, cex.axis = 0.8, col = c("blue4",
"orange3"), ymax = 8)
## Error: could not find function "manhattan"
manhattan(Man2, main = "Manhattan Plot", cex = 0.5, cex.axis = 0.8, col = c("blue4",
"orange3"), ymax = 8)
## Error: could not find function "manhattan"
dev.off()
## pdf
## 2
Pvalue = data.frame(tmp[, 3], P)
## Error: object 'tmp' not found
colnames(Pvalue) = c("GeneSymbol", colnames(P))
## Error: object 'P' not found
rownames(Pvalue) <- rownames(data)
## Error: object 'Pvalue' not found
P1 <- as.numeric(as.character(Pvalue[, 2]))
## Error: object 'Pvalue' not found
P2 <- as.numeric(as.character(Pvalue[, 5]))
## Error: object 'Pvalue' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
# plot(P11,P12,cex=0.5,xlim=c(-10,0),ylim=c(-10,0),xlab='Lung
# adenocarcinoma',ylab='Lung Squamous Cell Carcinoma')
jpeg("lusc.luad.total.pvalue.jpeg")
plot(P11, P12, cex = 0.1, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
dev.off()
## pdf
## 2
plot(1:10, col = 1:8)