## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。





## 加载包
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
theme_set(theme_bw(base_family = "STKaiti"))

## 读取数据
ansdata <- read_excel("Array_data_nobackground_normalised.xlsx")
summary(ansdata)
##     Factor          Factor_group         Array data      
##  Length:6400        Length:6400        Min.   :    0.76  
##  Class :character   Class :character   1st Qu.:   80.23  
##  Mode  :character   Mode  :character   Median :  143.06  
##                                        Mean   : 1487.96  
##                                        3rd Qu.:  316.36  
##                                        Max.   :59801.26  
##                                                          
##   Patient Id        Collection_Date     Illness_Day      Outcome         
##  Length:6400        Length:6400        Min.   : 3.00   Length:6400       
##  Class :character   Class :character   1st Qu.:11.00   Class :character  
##  Mode  :character   Mode  :character   Median :15.00   Mode  :character  
##                                        Mean   :17.89                     
##                                        3rd Qu.:23.00                     
##                                        Max.   :68.00                     
##                                        NA's   :400                       
##       Age            Sex            Disease Phase Sampling Categry  
##  Min.   : 6.00   Length:6400        Min.   :1.0   Length:6400       
##  1st Qu.:41.00   Class :character   1st Qu.:2.0   Class :character  
##  Median :55.00   Mode  :character   Median :2.0   Mode  :character  
##  Mean   :53.67                      Mean   :2.2                     
##  3rd Qu.:67.00                      3rd Qu.:3.0                     
##  Max.   :82.00                      Max.   :4.0                     
##  NA's   :400                        NA's   :400
## 处理缺失值
colnames(ansdata) <- c("factor","factorgroup","arraydata","paintID",
                       "collectiondate","illnessday","outcom","age","sex",
                       "diseaPH","SampleCategry")
summary(ansdata)
##     factor          factorgroup          arraydata       
##  Length:6400        Length:6400        Min.   :    0.76  
##  Class :character   Class :character   1st Qu.:   80.23  
##  Mode  :character   Mode  :character   Median :  143.06  
##                                        Mean   : 1487.96  
##                                        3rd Qu.:  316.36  
##                                        Max.   :59801.26  
##                                                          
##    paintID          collectiondate       illnessday       outcom         
##  Length:6400        Length:6400        Min.   : 3.00   Length:6400       
##  Class :character   Class :character   1st Qu.:11.00   Class :character  
##  Mode  :character   Mode  :character   Median :15.00   Mode  :character  
##                                        Mean   :17.89                     
##                                        3rd Qu.:23.00                     
##                                        Max.   :68.00                     
##                                        NA's   :400                       
##       age            sex               diseaPH    SampleCategry     
##  Min.   : 6.00   Length:6400        Min.   :1.0   Length:6400       
##  1st Qu.:41.00   Class :character   1st Qu.:2.0   Class :character  
##  Median :55.00   Mode  :character   Median :2.0   Mode  :character  
##  Mean   :53.67                      Mean   :2.2                     
##  3rd Qu.:67.00                      3rd Qu.:3.0                     
##  Max.   :82.00                      Max.   :4.0                     
##  NA's   :400                        NA's   :400
## 对数据进行分析

usedata2 <- ansdata%>%
  group_by(factor,factorgroup,sex,outcom)

usedata2$outSC <- paste(usedata2$outcom,usedata2$SampleCategry,sep = "-")
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
# ## -----------该段不实主要的,可用下面的函数定义-------------------------------
# ## "Healthy-First"数据表中细胞自建的相关性
# ## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
# ## 准备数据
# corrdata <- usedata2[usedata2$outSC == "Healthy-First",c(1,2,3,12)]
# corrdata2 <- corrdata[c("factor","arraydata")]
# corrdata2$factor <- factor(corrdata2$factor)
# 
# ## 查看因子的个数
# table(corrdata2$factor)
# n <- length(table(corrdata2$factor))
# n
# ## 变换数据
# corrdata3 <- as.data.frame(split(corrdata2$arraydata,corrdata2$factor))
# # aa <- cor(corrdata3,method = "pearson")
# # cor.test(corrdata3)
# ## 计算相关系数
# sper <- rcorr(as.matrix(corrdata3),type = "pearson")
# ## 相关系数矩阵
# sper$r
# ## 对应的p值矩阵
# sper$P
# 
# ## 保存数据
# write.csv(sper$r,"相关系数.csv")
# write.csv(sper$P,"相关系数P值.csv")
# 
# ## ------------------------------------------------------------------------
# ## 相关系数矩阵
# rellation <- as.data.frame(sper$r)
# rellp <- as.data.frame(sper$P)
# ##  找到每个因子的PR值
# ## 生成相应的基因组合
# hang <- matrix(rownames(rellation), nrow = n, ncol = n,byrow = FALSE)
# lie <- matrix(colnames(rellation),nrow = n,ncol = n,byrow = TRUE)
# ## 对相应的数据取下三角形的数值
# lowrel <- rellation[lower.tri(rellation)]
# lowp <- rellp[lower.tri(rellp)]
# lowhang <- hang[lower.tri(hang)]
# lowlie <- lie[lower.tri(lie)]
# ## 生成数据表
# healthdata <- data.frame(data = matrix(nrow = length(as.vector(lowhang)),ncol = 5))
# names(healthdata) = c("factor1","factor2","relation","relP","factorgroup")
# 
# healthdata$factor1 <- as.vector(lowhang)
# healthdata$factor2 <- as.vector(lowlie)
# healthdata$relation <- as.vector(lowrel)
# healthdata$relP <- as.vector(lowp)
# healthdata$factorgroup <- rep("Healthy-First",dim(healthdata)[1])
# ## 保存healthdata
# # write.csv(healthdata,"health的结果.csv",row.names = FALSE)
# 
# 
# ##  找出符合要求的数据
# healthfuhe <- healthdata[which((healthdata$relation >=0.8 | healthdata$relation <=-0.8) &
#                                  healthdata$relation !=1 & healthdata$relP <= 0.05),]
# 
# 
# # write.csv(healthfuhe,"health符合要求的数据.csv",row.names = FALSE)
# ## -----------------------------------------------------------------------
# ## -----------该段不实主要的,可用下面的函数定义-------------------------------
## 编写一个求解需要结果的函数
use5data <- function(data,minre1 = 0.8,minre2 = -0.8,maxp = 0.05,dataname = "Healthy-First"){
  ## 该函数用来计算相关系数,并且输出符合要求的组合
  # data <- corrdata2 ## 计算需要的数据,第一列为因子分类数据,第二列为相应的浓度数据
  # # data : 数据,一列为基因,另一列为相应的浓度
  # minre1 = 0.8  ## 最小的正相关系数
  # minre2 = -0.8  ## 最大的富相关系数
  # maxp = 0.05  ## 最大的相关系数显著性值
  # dataname = "Healthy-First"  ## 该数据表的类型,该数据表的名字
  library(Hmisc)
  ## 查看因子的个数
  corrdata2 <- data
  names(corrdata2) <- c("factor","arraydata")
  ## 查看因子的个数
  n <- length(table(corrdata2$factor))
  ## 变换数据
  corrdata3 <- as.data.frame(split(corrdata2$arraydata,corrdata2$factor))
  ## 计算相关系数
  sper <- rcorr(as.matrix(corrdata3),type = "pearson")
  ## ------------------------------------------------------------------------
  ## 相关系数矩阵
  rellation <- as.data.frame(sper$r)
  rellp <- as.data.frame(sper$P)
  ##  找到每个因子的PR值
  ## 生成相应的基因组合
  hang <- matrix(rownames(rellation), nrow = n, ncol = n,byrow = FALSE)
  lie <- matrix(colnames(rellation),nrow = n,ncol = n,byrow = TRUE)
  ## 对相应的数据取下三角形的数值
  lowrel <- rellation[lower.tri(rellation)]
  lowp <- rellp[lower.tri(rellp)]
  lowhang <- hang[lower.tri(hang)]
  lowlie <- lie[lower.tri(lie)]
  ## 生成数据表
  healthdata <- data.frame(data = matrix(nrow = length(as.vector(lowhang)),ncol = 5))
  names(healthdata) = c("factor1","factor2","relation","relP","factorgroup")
  
  healthdata$factor1 <- as.vector(lowhang)
  healthdata$factor2 <- as.vector(lowlie)
  healthdata$relation <- as.vector(lowrel)
  healthdata$relP <- as.vector(lowp)
  healthdata$factorgroup <- rep(dataname,dim(healthdata)[1])
  
  ##  找出符合要求的数据
  healthfuhe <- healthdata[which((healthdata$relation >=minre1 | healthdata$relation <=minre2) &
                                   healthdata$relation !=1 & healthdata$relP <= maxp),]
  
  ## 保存数据
  res <- list()
  res$reldata <- healthdata
  res$usereldata <- healthfuhe
  return(res)
}
## "Healthy-First"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
corrdata <- usedata2[usedata2$outSC == "Healthy-First",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Healthy-First")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"health的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"health符合要求的数据.csv",row.names = FALSE)
## "Fatal-All other"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Fatal-All other",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Fatal-All other")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Fatal-All other的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Fatal-All other符合要求的数据.csv",row.names = FALSE)
## "Fatal-First"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Fatal-First",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Fatal-First")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Fatal-First的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Fatal-First符合要求的数据.csv",row.names = FALSE)
## "Fatal-Last"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Fatal-Last",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Fatal-Last")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Fatal-Last的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Fatal-Last符合要求的数据.csv",row.names = FALSE)
## "Mild-All other"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Mild-All other",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Mild-All other")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Mild-All other的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Mild-All other符合要求的数据.csv",row.names = FALSE)
## "Mild-First"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Mild-First",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Mild-First")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Mild-First的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Mild-First符合要求的数据.csv",row.names = FALSE)
## "Severe-All other"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Severe-All other",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Severe-All other")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Severe-All other的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Severe-All other符合要求的数据.csv",row.names = FALSE)
## "Severe-First"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Severe-First",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Severe-First")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Severe-First的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Severe-First符合要求的数据.csv",row.names = FALSE)
## "Severe-Last"数据表中细胞自建的相关性
## 然后把各大组的pairwusecorrelation(r>=0.8,p<=0.05)分别总结出来。
## 准备数据
table(usedata2$outSC)
## 
##  Fatal-All other      Fatal-First       Fatal-Last    Healthy-First 
##              640              480              480              400 
##   Mild-All other       Mild-First Severe-All other     Severe-First 
##              480              560             1600              880 
##      Severe-Last 
##              880
corrdata <- usedata2[usedata2$outSC == "Severe-Last",c(1,2,3,12)]
corrdata2 <- corrdata[c("factor","arraydata")]
corrdata2$factor <- factor(corrdata2$factor)
## 计算需要的数据
healF <- use5data(data = corrdata2,dataname = "Severe-Last")
## 保存所需要的数据
## 相关系数数据
write.csv(healF$reldata,"Severe-Last的结果.csv",row.names = FALSE)

## 符合要求的相关系数数据
write.csv(healF$usereldata,"Severe-Last符合要求的数据.csv",row.names = FALSE)