library(knitr)

In order to check the difference of the P-values with paired and non-paired samples, as well as between LUSC and LUAD. We conducted the following analysis.

first we should load our customed R function for t-test or wilcox test

library("stringr")
setwd("/hgcnt44fs/sguo/methylation")
## Error: cannot change working directory

PairWilPValue <- function(data, x1, x2) {
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        out <- data.frame()
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- try(wilcox.test(as.numeric(data[i, x1]), as.numeric(data[i, 
                x2]), paired = T, na.action = na.omit))
            output[i, 1] <- tmp1$p.value
            output[i, 2] <- median(as.numeric(data[i, x1])) - median(as.numeric(data[i, 
                x2]))
            output[i, 3] <- median(as.numeric(data[i, x1]))
            output[i, 4] <- median(as.numeric(data[i, x2]))
            # print(i)
        }
    }
    out <- cbind(rownames(data), output)
    out
}

NonPairWilPValue <- function(data, x1, x2) {
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        out <- data.frame()
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- try(wilcox.test(as.numeric(data[i, x1]), as.numeric(data[i, 
                x2]), paired = F, na.action = na.omit))
            output[i, 1] <- tmp1$p.value
            output[i, 2] <- median(as.numeric(data[i, x1]), na.rm = T) - median(as.numeric(data[i, 
                x2]), na.rm = T)
            output[i, 3] <- median(as.numeric(data[i, x1]), na.rm = T)
            output[i, 4] <- median(as.numeric(data[i, x2]), na.rm = T)
            # print(i)
        }
    }
    out <- cbind(rownames(data), output)
    out
}

PairttestPValue <- function(data, x1, x2) {
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        out <- data.frame()
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]), 
                paired = T, na.action = na.omit))
            output[i, 1] <- tmp1$p.value
            output[i, 2] <- median(as.numeric(data[i, x1]), na.rm = T) - median(as.numeric(data[i, 
                x2]), na.rm = T)
            output[i, 3] <- median(as.numeric(data[i, x1]), na.rm = T)
            output[i, 4] <- median(as.numeric(data[i, x2]), na.rm = T)
            # print(i)
        }
    }
    out <- cbind(rownames(data), output)
    out
}

NonPairttestPValue <- function(data, x1, x2) {
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        out <- data.frame()
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]), 
                paired = F, na.action = na.omit))
            output[i, 1] <- tmp1$p.value
            output[i, 2] <- mean(as.numeric(data[i, x1])) - mean(as.numeric(data[i, 
                x2]))
            output[i, 3] <- mean(as.numeric(data[i, x1]))
            output[i, 4] <- mean(as.numeric(data[i, x2]))
            # print(i)
        }
    }
    out <- cbind(rownames(data), output)
    out
}

load data collection function as the following:

DatacollectionByCancer <- function(cancer) {
    rlt <- list()
    result1 <- c()
    result2 <- c()
    P1 <- c()
    P2 <- c()
    sta1 <- c()
    sta2 <- c()

    pattern = paste("jhu-usc.edu_", cancer, ".*", sep = "")
    print(pattern)
    file = list.files(pattern = pattern)
    idv <- unique(as.array(str_extract(file, "TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*")))
    idv1 <- as.array(str_extract(file, "TCGA-[0-9|a-z|A-Z]*-[0-9|a-z|A-Z]*-[0-9]*"))
    pairidv <- c()
    for (i in 1:length(idv)) {
        t1 <- paste(idv[i], "-01", sep = "")
        t2 <- paste(idv[i], "-11", sep = "")
        if (all(any(grepl(t1, file)), any(grepl(t2, file)))) {
            pairidv <- c(pairidv, t1, t2)
        }
    }
    pairfile <- file[sapply(pairidv, function(x) {
        x <- grep(x, file)
        x[length(x)]
    })]
    allfile <- file[sapply(idv1, function(x) {
        x <- grep(x, file)
        x[length(x)]
    })]

    data1 <- c()
    for (i in 1:length(pairfile)) {
        tmp <- read.table(pairfile[i], head = T, sep = "\t", as.is = F, skip = 1)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
        data1 <- cbind(data1, tmp[, 2])
        # print(c(i,cancer))
    }

    data2 <- c()
    for (i in 1:length(allfile)) {
        tmp <- read.table(allfile[i], head = T, sep = "\t", as.is = F, skip = 1)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
        data2 <- cbind(data2, tmp[, 2])
        # print(c(i,cancer))
    }

    rownames(data1) = tmp[, 1]
    colnames(data1) = pairidv
    rownames(data2) = tmp[, 1]
    colnames(data2) = idv1

    file1 = paste(cancer, "_pair.RData")
    file2 = paste(cancer, "_non_pair.RData")

    save(data1, file = file1)
    save(data2, file = file2)

    rlt$pair <- data1
    rlt$nonpair <- data2

    rlt
}

load pair data set pvalue calcuation function as the following:

PairPvalueCalcuateByCancer <- function(data) {
    # differential methylation calculation
    rlt <- list()
    data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2])  # row=gene, col=inv
    type <- substr(colnames(data), 14, 15)
    x1 <- which(type == "01")  # type 1, cancer or sensitive
    x2 <- which(type == "11")  # type 2, normal or resistant
    out1 <- PairWilPValue(data, x1, x2)
    colnames(out1) <- c("GeneName", "Pvalue", "Statistic", "mean1", "mean2")
    rlt$p <- out1[, 2]
    rlt$sta <- out1[, 3]
    rlt
}


PairPvalueCalcuateByCancerBylogistic <- function(data) {
    # differential methylation calculation
    rlt <- list()
    data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2])  # row=gene, col=inv
    type <- substr(colnames(data), 14, 15)
    x1 <- which(type == "01")  # type 1, cancer or sensitive
    x2 <- which(type == "11")  # type 2, normal or resistant
    y <- abs(as.numeric(as.factor(type)) - 2)
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- summary(try(glm(y ~ data[i, ])))
            output[i, 1] <- tmp1$coefficients[2, 4]
            output[i, 2] <- median(as.numeric(data[i, x1])) - median(as.numeric(data[i, 
                x2]))
            output[i, 3] <- median(as.numeric(data[i, x1]))
            output[i, 4] <- median(as.numeric(data[i, x2]))
            print(i)
        }
    }
    out1 <- data.frame(Genename = rownames(data), output)
    rlt$cpg <- out1[, 1]
    rlt$p <- out1[, 2]
    rlt$sta <- out1[, 3]
    rlt$meth1 <- out1[, 4]
    rlt$meth2 <- out1[, 5]

    rlt
}

PairttestPValue <- function(data, x1, x2) {
    output <- matrix(NA, dim(data)[1], 4)  # set output matrix ()
    for (i in 1:dim(data)[1]) {
        out <- data.frame()
        if (all(!any(all(is.na(data[i, x1])), all(is.na(data[i, x2]))), sum(is.na(data[i, 
            ])) < 0.5 * length(data[i, ]))) {
            tmp1 <- try(t.test(as.numeric(data[i, x1]), as.numeric(data[i, x2]), 
                paired = T, na.action = na.omit))
            output[i, 1] <- tmp1$p.value
            output[i, 2] <- mean(as.numeric(data[i, x1])) - mean(as.numeric(data[i, 
                x2]))
            output[i, 3] <- mean(as.numeric(data[i, x1]))
            output[i, 4] <- mean(as.numeric(data[i, x2]))
            # print(i)
        }
    }
    out <- cbind(rownames(data), output)
    out
}

load non-pair data set pvalue calcuation function as the following:

NonPairwilcoxPvalueCalcuateByCancer <- function(data) {
    # differential methylation calculation
    rlt <- list()
    data <- data + matrix(rnorm(length(data), 1e-05, 1e-06), dim(data)[1], dim(data)[2])  # row=gene, col=inv
    type <- substr(colnames(data), 14, 15)
    x1 <- which(type == "01")  # type 1, cancer or sensitive
    x2 <- which(type == "11")  # type 2, normal or resistant
    out1 <- NonPairWilPValue(data, x1, x2)
    colnames(out1) <- c("GeneName", "Pvalue", "Statistic", "mean1", "mean2")
    rlt$p <- out1[, 2]
    rlt$sta <- out1[, 3]
    rlt
}
  1. Result

2.1 Data Collection Process

# luad<-DatacollectionByCancer('LUAD') lusc<-DatacollectionByCancer('LUSC')
# dim(luad$data1) dim(luad$data2) dim(lusc$data1) dim(lusc$data2)

# colnames(luad$data1)[1:5] colnames(luad$data2)[1:5]
# colnames(lusc$data1)[1:5] colnames(lusc$data2)[1:5]

# rownames(luad$data1)[1:5] rownames(luad$data2)[1:5]
# rownames(lusc$data1)[1:5] rownames(lusc$data2)[1:5]

2.2 P-value calcualtion for pair and nonpair luad and lusc dataset.

list.files(pattern = "*.RData")
## [1] "luad.pair.pvalue.RData"    "lungcancer.4.pvalue.RData"
## [3] "lusc.pair.pvalue.RData"
load("data_pair_luad.RData")
## Warning: cannot open compressed file 'data_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data1)
## Error: object 'data1' not found
load("data_pair_lusc.RData")
## Warning: cannot open compressed file 'data_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data1)
## Error: object 'data1' not found
load("data_non_pair_luad.RData")
## Warning: cannot open compressed file 'data_non_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data2)
## Error: object 'data2' not found
load("data_non_pair_lusc.RData")
## Warning: cannot open compressed file 'data_non_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
ncol(data2)
## Error: object 'data2' not found

since we have collected the data before, therefore, here we load the previous data directly.

library("preprocessCore")
## Error: there is no package called 'preprocessCore'
list.files(pattern = "*.RData")
## [1] "luad.pair.pvalue.RData"    "lungcancer.4.pvalue.RData"
## [3] "lusc.pair.pvalue.RData"
lusc <- list()
luad <- list()
load("data_pair_luad.RData")
## Warning: cannot open compressed file 'data_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data1
## Error: object 'data1' not found
luad$pair <- normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(luad$pair) = colnames(tmp)
## Error: object 'tmp' not found
rownames(luad$pair) = rownames(tmp)
## Error: object 'tmp' not found

load("data_non_pair_luad.RData")
## Warning: cannot open compressed file 'data_non_pair_luad.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data2
## Error: object 'data2' not found
luad$nonpair <- normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(luad$nonpair) = colnames(tmp)
## Error: object 'tmp' not found
rownames(luad$nonpair) = rownames(tmp)
## Error: object 'tmp' not found

load("data_pair_lusc.RData")
## Warning: cannot open compressed file 'data_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data1
## Error: object 'data1' not found
lusc$pair = normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(lusc$pair) <- colnames(tmp)
## Error: object 'tmp' not found
rownames(lusc$pair) <- rownames(tmp)
## Error: object 'tmp' not found

load("data_non_pair_lusc.RData")
## Warning: cannot open compressed file 'data_non_pair_lusc.RData', probable
## reason 'No such file or directory'
## Error: cannot open the connection
tmp <- data2
## Error: object 'data2' not found
lusc$nonpair = normalize.quantiles(tmp)
## Error: could not find function "normalize.quantiles"
colnames(lusc$nonpair) <- colnames(tmp)
## Error: object 'tmp' not found
rownames(lusc$nonpair) <- rownames(tmp)
## Error: object 'tmp' not found

2.2.1 P-value calcualtion for pair and nonpair luad and lusc dataset based on raw dataset

luad_pair_rlt <- PairPvalueCalcuateByCancer(luad$pair)
## Error: non-numeric matrix extent
lusc_pair_rlt <- PairPvalueCalcuateByCancer(lusc$pair)
## Error: non-numeric matrix extent
luad_nonpair_rlt <- NonPairwilcoxPvalueCalcuateByCancer(luad$nonpair)
## Error: non-numeric matrix extent
lusc_nonpair_rlt <- NonPairwilcoxPvalueCalcuateByCancer(lusc$nonpair)
## Error: non-numeric matrix extent

pvalue <- list()
pvalue$luad_pair_rlt = luad_pair_rlt
## Error: object 'luad_pair_rlt' not found
pvalue$lusc_pair_rlt = lusc_pair_rlt
## Error: object 'lusc_pair_rlt' not found
pvalue$luad_nonpair_rlt = luad_nonpair_rlt
## Error: object 'luad_nonpair_rlt' not found
pvalue$lusc_nonpair_rlt = lusc_nonpair_rlt
## Error: object 'lusc_nonpair_rlt' not found

save(pvalue, file = "lungcancer.4.pvalue.RData")

tmp1 <- na.omit(data.frame(p = -log(as.numeric(as.character(pvalue$luad_pair_rlt$p)), 
    10), sta = as.numeric(pvalue$luad_pair_rlt$sta)))
tmp2 <- na.omit(data.frame(p = -log(as.numeric(pvalue$lusc_pair_rlt$p), 10), 
    sta = as.numeric(pvalue$lusc_pair_rlt$sta)))
tmp3 <- na.omit(data.frame(p = -log(as.numeric(pvalue$luad_nonpair_rlt$p), 10), 
    sta = as.numeric(pvalue$luad_nonpair_rlt$sta)))
tmp4 <- na.omit(data.frame(p = -log(as.numeric(pvalue$lusc_nonpair_rlt$p), 10), 
    sta = as.numeric(pvalue$lusc_nonpair_rlt$sta)))

jpeg("diplot.pvalue.and.sta.1.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmp1[, 2], tmp1[, 1], xlab = "methylation difference", ylab = "-log(P-value)", 
    cex = 0.1, main = "LUAD Paired Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)

plot(tmp2[, 2], tmp2[, 1], cex = 0.1, xlab = "methylation difference", ylab = "", 
    main = "LUSC Paired Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf 
##   2

jpeg("diplot.pvalue.and.sta.2.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmp3[, 2], tmp3[, 1], xlab = "methylation difference", ylab = "-log(P-value)", 
    cex = 0.1, main = "LUAD Whole Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)

plot(tmp4[, 2], tmp4[, 1], xlab = "methylation difference", ylab = "", cex = 0.1, 
    main = "LUSC Whole Dataset", cex.main = 0.9)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf 
##   2
tmp1 <- (data.frame(p = -log(as.numeric(as.character(pvalue$luad_pair_rlt$p)), 
    10), sta = as.numeric(pvalue$luad_pair_rlt$sta)))
tmp2 <- (data.frame(p = -log(as.numeric(pvalue$lusc_pair_rlt$p), 10), sta = as.numeric(pvalue$lusc_pair_rlt$sta)))
tmp3 <- (data.frame(p = -log(as.numeric(pvalue$luad_nonpair_rlt$p), 10), sta = as.numeric(pvalue$luad_nonpair_rlt$sta)))
tmp4 <- (data.frame(p = -log(as.numeric(pvalue$lusc_nonpair_rlt$p), 10), sta = as.numeric(pvalue$lusc_nonpair_rlt$sta)))

tmpc1 <- data.frame(luad = tmp1[, 2], lusc = tmp2[, 2])
tmpc2 <- data.frame(luad = tmp3[, 2], lusc = tmp4[, 2])

jpeg("diplot.pvalue.and.sta.3.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmpc1[, 1], tmpc1[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -0.3, lty = 2, col = 2, lwd = 4)
abline(h = 0.3, lty = 2, col = 2, lwd = 4)

plot(tmpc2[, 1], tmpc2[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(v = -0.3, lty = 2, col = 2, lwd = 4)
abline(v = 0.3, lty = 2, col = 2, lwd = 4)
abline(h = -0.3, lty = 2, col = 2, lwd = 4)
abline(h = 0.3, lty = 2, col = 2, lwd = 4)
dev.off()
## pdf 
##   2

tmpc1 <- data.frame(luad = tmp1[, 1], lusc = tmp2[, 1])
tmpc2 <- data.frame(luad = tmp3[, 1], lusc = tmp4[, 1])

jpeg("diplot.pvalue.and.sta.4.jpg")
par(mfrow = c(2, 2), mar = c(5, 4, 4, 0.5))
plot(tmpc1[, 1], tmpc1[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma", 
    cex = 0.01, axes = T)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
abline(v = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
plot(tmpc2[, 1], tmpc2[, 2], xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma", 
    cex = 0.01, axes = T)
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Error: need finite 'xlim' values
abline(h = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
abline(v = -log(0.05/485577, 10), lty = 2, col = 3, lwd = 4)
dev.off()
## pdf 
##   2
tmpcc1 <- data.frame(luad = tmp1[, 1:2], lusc = tmp2[, 1:2])
tmcpp1 <- subset(tmpcc1, luad.p > -log(0.05/485577, 10) & lusc.p > -log(0.05/485577, 
    10) & abs(luad.sta) > 0.3 & abs(luad.sta) > 0.3)
tmp11 <- nrow(subset(tmp1, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp21 <- nrow(subset(tmp2, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp31 <- nrow(subset(tmp3, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp41 <- nrow(subset(tmp4, p > -log(0.05/485577, 10) & abs(sta) > 0.3))
tmp11
## [1] 0
tmp21
## [1] 0
tmp31
## [1] 0
tmp41
## [1] 0

there are tmp11 significant differential methylated genes (and the delta beta is great than 0.3) based on paired LUAD dataset

Figure 1A. P-value compare between LUAD and LUSC based on pair dataset

tiff("pvaluecompare.tiff")
par(mfrow = c(2, 2))
P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found

Figure 1B. P-value compare between LUAD and LUSC based on non-pair dataset

P1 <- luad_nonpair_rlt$p
## Error: object 'luad_nonpair_rlt' not found
P2 <- lusc_nonpair_rlt$p
## Error: object 'lusc_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
dev.off()
## tiff 
##    3

Figure 1C. P-value compare between pair and non-pair based on LUAD

P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- luad_nonpair_rlt$p
## Error: object 'luad_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found

Figure 1D. P-value compare between pair and non-pair based on LUSC

P1 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P2 <- lusc_nonpair_rlt$p
## Error: object 'lusc_nonpair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
plot(P11, P12, cex = 0.4, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found

2.2 P-value calcualtion for pair and nonpair luad and lusc dataset based on same sample size

ncol(luad$pair)
## NULL
ncol(lusc$pair)
## NULL
min <- min(ncol(luad$pair), ncol(lusc$pair))
## Warning: no non-missing arguments to min; returning Inf
min
## [1] Inf
luad_pair_rlt <- PairPvalueCalcuateByCancer(luad$pair[, 1:min])
## Error: result would be too long a vector
lusc_pair_rlt <- PairPvalueCalcuateByCancer(lusc$pair[, 1:min])
## Error: result would be too long a vector

P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found

jpeg("lusc.luad.n52.pvalue.jpeg")
plot(P11, P12, cex = 0.5, xlim = c(-10, 0), ylim = c(-10, 0), xlab = "Lung Adenocarcinoma", 
    ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 2)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 2)
## Error: object 'P11' not found
dev.off()
## pdf 
##   2

manhattan plot for lung adenocarcinoma and lung squamous cell carcinoma

pattern = paste("jhu-usc.edu_", "LUAD", ".*", sep = "")
print(pattern)
## [1] "jhu-usc.edu_LUAD.*"
file = list.files(pattern = pattern)
tmp <- read.table(file[1], head = T, sep = "\t", as.is = F, skip = 1)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'NA': No such file or directory
## Error: cannot open the connection
sortanno <- read.table("annotation450.sort.txt", head = F, sep = "\t", as.is = T)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'annotation450.sort.txt': No such file or
## directory
## Error: cannot open the connection

P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
NP11 <- P1[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P1' not found
NP12 <- P2[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P2' not found


P1 <- as.numeric(luad_pair_rlt$p)
## Error: object 'luad_pair_rlt' not found
P2 <- as.numeric(lusc_pair_rlt$p)
## Error: object 'lusc_pair_rlt' not found
sta1 <- as.numeric(luad_pair_rlt$sta)
## Error: object 'luad_pair_rlt' not found
sta2 <- as.numeric(lusc_pair_rlt$sta)
## Error: object 'lusc_pair_rlt' not found

effectloci <- max(c(which(sortanno[, 2] == "X"), which(sortanno[, 2] == "Y"))) + 
    1
## Error: object 'sortanno' not found
region <- effectloci:nrow(sortanno)
## Error: object 'effectloci' not found
Man1 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region, 
    2]), BP = sortanno[region, 3], P = NP11[region])
## Error: object 'sortanno' not found
Man2 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region, 
    2]), BP = sortanno[region, 3], P = NP12[region])
## Error: object 'sortanno' not found

Man1$SNP = as.character(Man1$SNP)
## Error: object 'Man1' not found
Man1$CHR = as.numeric(as.character(Man1$CHR))
## Error: object 'Man1' not found
Man2$SNP = as.character(Man2$SNP)
## Error: object 'Man2' not found
Man2$CHR = as.numeric(as.character(Man2$CHR))
## Error: object 'Man2' not found


Man1 <- na.omit(Man1)
## Error: object 'Man1' not found
Man2 <- na.omit(Man2)
## Error: object 'Man2' not found

tiff("manhattan.tiff")
par <- par(mfrow = c(2, 1))
manhattan(Man1, main = "Manhattan Plot For Lung Adenocarcinoma", cex = 0.5, 
    cex.axis = 0.8, col = c("blue4", "orange3"), genomewideline = -log(0.01/485577, 
        10), suggestiveline = F, cex.main = 0.8)
## Error: could not find function "manhattan"
manhattan(Man2, main = "Manhattan Plot For Lung Squamous Cell Carcinoma", cex = 0.5, 
    cex.axis = 0.8, col = c("blue4", "orange3"), genomewideline = -log(0.01/485577, 
        10), suggestiveline = F, cex.main = 0.8)
## Error: could not find function "manhattan"
dev.off()
## pdf 
##   2

Pvalue = data.frame(tmp[, 3], P)
## Error: object 'tmp' not found
colnames(Pvalue) = c("GeneSymbol", colnames(P))
## Error: object 'P' not found
rownames(Pvalue) <- rownames(data)
## Error: object 'Pvalue' not found

2.3 P-value calcualtion for pair and nonpair luad and lusc dataset based on same sample size by logitic regression

ncol(luad$pair)
## NULL
ncol(lusc$pair)
## NULL
min <- min(ncol(luad$pair), ncol(lusc$pair))
## Warning: no non-missing arguments to min; returning Inf
min
## [1] Inf
luad_pair_rlt <- PairPvalueCalcuateByCancerBylogistic(luad$pair[, 1:min])
## Error: result would be too long a vector
lusc_pair_rlt <- PairPvalueCalcuateByCancerBylogistic(lusc$pair[, 1:min])
## Error: result would be too long a vector

P1 <- luad_pair_rlt$p
## Error: object 'luad_pair_rlt' not found
P2 <- lusc_pair_rlt$p
## Error: object 'lusc_pair_rlt' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found

jpeg("lusc.luad.n52.pvalue.jpeg")
plot(P11, P12, cex = 0.5, xlim = c(-10, 0), ylim = c(-10, 0), xlab = "Lung adenocarcinoma", 
    ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 2)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 2)
## Error: object 'P11' not found
dev.off()
## pdf 
##   2
pattern = paste("jhu-usc.edu_", "LUAD", ".*", sep = "")
print(pattern)
## [1] "jhu-usc.edu_LUAD.*"
file = list.files(pattern = pattern)
tmp <- read.table(file[1], head = T, sep = "\t", as.is = F, skip = 1)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'NA': No such file or directory
## Error: cannot open the connection
sortanno <- read.table("annotation450.sort.txt", head = F, sep = "\t", as.is = T)  # tmp<-read.table(file[i],head=T,sep='\t',as.is=F)
## Warning: cannot open file 'annotation450.sort.txt': No such file or
## directory
## Error: cannot open the connection

NP11 <- P11[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P11' not found
NP12 <- P12[match(sortanno[, 1], tmp[, 1])]
## Error: object 'P12' not found

effectloci <- max(c(which(sortanno[, 2] == "X"), which(sortanno[, 2] == "Y"))) + 
    1
## Error: object 'sortanno' not found
region <- effectloci:nrow(sortanno)
## Error: object 'effectloci' not found
Man1 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region, 
    2]), BP = sortanno[region, 3], P = NP11[region])
## Error: object 'sortanno' not found
Man2 <- data.frame(SNP = sortanno[region, 1], CHR = as.character(sortanno[region, 
    2]), BP = sortanno[region, 3], P = NP12[region])
## Error: object 'sortanno' not found

Man1$SNP = as.character(Man1$SNP)
## Error: object 'Man1' not found
Man1$CHR = as.numeric(as.character(Man1$CHR))
## Error: object 'Man1' not found
Man1$P = 10^Man1$P
## Error: object 'Man1' not found

Man2$SNP = as.character(Man2$SNP)
## Error: object 'Man2' not found
Man2$CHR = as.numeric(as.character(Man2$CHR))
## Error: object 'Man2' not found
Man2$P = 10^Man2$P
## Error: object 'Man2' not found


Man1 <- na.omit(Man1)
## Error: object 'Man1' not found
Man2 <- na.omit(Man2)
## Error: object 'Man2' not found

tiff("manhattan.tiff")
par <- par(mfrow = c(2, 1))
manhattan(Man1, main = "Manhattan Plot", cex = 0.5, cex.axis = 0.8, col = c("blue4", 
    "orange3"), ymax = 8)
## Error: could not find function "manhattan"
manhattan(Man2, main = "Manhattan Plot", cex = 0.5, cex.axis = 0.8, col = c("blue4", 
    "orange3"), ymax = 8)
## Error: could not find function "manhattan"
dev.off()
## pdf 
##   2

Pvalue = data.frame(tmp[, 3], P)
## Error: object 'tmp' not found
colnames(Pvalue) = c("GeneSymbol", colnames(P))
## Error: object 'P' not found
rownames(Pvalue) <- rownames(data)
## Error: object 'Pvalue' not found
P1 <- as.numeric(as.character(Pvalue[, 2]))
## Error: object 'Pvalue' not found
P2 <- as.numeric(as.character(Pvalue[, 5]))
## Error: object 'Pvalue' not found
P11 <- log(as.numeric(P1), 10)
## Error: object 'P1' not found
P12 <- log(as.numeric(P2), 10)
## Error: object 'P2' not found
# plot(P11,P12,cex=0.5,xlim=c(-10,0),ylim=c(-10,0),xlab='Lung
# adenocarcinoma',ylab='Lung Squamous Cell Carcinoma')
jpeg("lusc.luad.total.pvalue.jpeg")
plot(P11, P12, cex = 0.1, xlab = "Lung adenocarcinoma", ylab = "Lung Squamous Cell Carcinoma")
## Error: object 'P11' not found
abline(v = log(0.05/length(P11), 10), lty = 2, col = 2, lwd = 4)
## Error: object 'P11' not found
abline(h = log(0.05/length(P11), 10), lty = 2, col = 3, lwd = 4)
## Error: object 'P11' not found
dev.off()
## pdf 
##   2


plot(1:10, col = 1:8)

plot of chunk unnamed-chunk-20