rm(list = ls())
################# ##############input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*xlsx",dir_path,full.names = T, recursive = F)
#dir_path_name
library(openxlsx)
#getSheetNames(grep("data.xlsx",dir_path_name,value = T))
data_1 <- read.xlsx(grep("data.xlsx",dir_path_name,value = T),sheet = 1,startRow = 2)
#dim(data_1)
#View(data_1)
colnames(data_1) <- paste(colnames(data_1), as.character(data_1[1,]),sep = "_")
data_2 <- data_1[-1,]
#dim(data_2) #[1] 288  31
#colnames(data_2)
##################################
data_3 <- data_2[,c(grep("Pos\\/Neg \\(Curve Rank\\)", colnames(data_2)),30)]
colnames(data_3) <- LETTERS[1:ncol(data_3)]
#View(data_3)
#apply(data_3, 2, function(x) unique(x))
data_3[data_3 == "x"] <- NA
data_3[data_3 == "NA"] <- NA
##############################################
data_3 <- data_3[1:50,]
head(data_3,6)
##          A        B        C        D        E
## 2 Negative Positive Negative Positive Positive
## 3 Negative Negative Negative Negative Negative
## 4 Negative Negative Negative Negative Negative
## 5 Negative Negative Negative Negative Negative
## 6 Negative Positive Negative Negative Negative
## 7 Negative Negative Negative Negative Negative
apply(data_3, 2, function(x) table(x))
##           A  B  C  D  E
## Negative 29 32 49 42 44
## Positive 21 18  1  8  5
#colnames(data_3)
list_1 <- data.frame(combn(colnames(data_3), 2))
list_1
##   X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 1  A  A  A  A  B  B  B  C  C   D
## 2  B  C  D  E  C  D  E  D  E   E
#View(list_1)
data_list <- list()
for (i in 1:ncol(list_1)) {
  #i = 1
  data_4 <- data_3[, list_1[,i]]
  head(data_4)
  data_5 <- data.frame(table(data_4[,1], data_4[,2]))
  data_5$name_1 <- colnames(data_4[1])
  data_5$name_2 <- colnames(data_4[2])
  #colnames(data_5)[1:2] <- colnames(data_4)
  data_list[[i]] <- data_5
}
data_6 <- do.call("rbind", data_list)
head(data_6)
##       Var1     Var2 Freq name_1 name_2
## 1 Negative Negative   21      A      B
## 2 Positive Negative   11      A      B
## 3 Negative Positive    8      A      B
## 4 Positive Positive   10      A      B
## 5 Negative Negative   28      A      C
## 6 Positive Negative   21      A      C
#dim(data_6)
#View(data_6)
data_7 <- data_6[data_6$Var1 == data_6$Var2,]
pos <- data_7[data_7$Var1 == "Positive", ]
neg <- data_7[data_7$Var1 == "Negative", ]
pos; neg
##        Var1     Var2 Freq name_1 name_2
## 4  Positive Positive   10      A      B
## 8  Positive Positive    0      A      C
## 12 Positive Positive    6      A      D
## 16 Positive Positive    3      A      E
## 20 Positive Positive    1      B      C
## 24 Positive Positive    6      B      D
## 28 Positive Positive    5      B      E
## 32 Positive Positive    0      C      D
## 36 Positive Positive    1      C      E
## 40 Positive Positive    3      D      E
##        Var1     Var2 Freq name_1 name_2
## 1  Negative Negative   21      A      B
## 5  Negative Negative   28      A      C
## 9  Negative Negative   27      A      D
## 13 Negative Negative   26      A      E
## 17 Negative Negative   32      B      C
## 21 Negative Negative   30      B      D
## 25 Negative Negative   32      B      E
## 29 Negative Negative   41      C      D
## 33 Negative Negative   44      C      E
## 37 Negative Negative   39      D      E
#dim(pos); dim(neg)
########################################################Symmetric matrix
###positive
v <- unique(c(pos$name_1, pos$name_2))
pos_1 <- sapply(v, function(x) {
  sapply(v, function(y) {
    pos[pos$name_1 %in% c(x, y) & pos$name_2 %in% c(x, y), "Freq"]
  })
})

pos_1[lower.tri(pos_1, diag = T)] <- NA
###negtive
v <- unique(c(neg$name_1, neg$name_2))
neg_1 <- sapply(v, function(x) {
  sapply(v, function(y) {
    neg[neg$name_1 %in% c(x, y) & neg$name_2 %in% c(x, y), "Freq"]
  })
})
neg_1[upper.tri(neg_1, diag = T)] <- NA
#class(neg_1)
#View(neg_1)
neg_2 <- data.frame(neg_1)
#View(neg_2)
pos_2 <- data.frame(pos_1)
#View(pos_2)
pos_2[lower.tri(pos_2)] <- neg_2[lower.tri(neg_2)]
pos_2[] <- sapply(pos_2[], as.numeric)
#neg_2[] <- sapply(neg_2[], as.numeric)
pos_1; neg_1; pos_2
##   A  B  C  D  E 
## A NA 10 0  6  3 
## B NA NA 1  6  5 
## C NA NA NA 0  1 
## D NA NA NA NA 3 
## E NA NA NA NA NA
##   A  B  C  D  E 
## A NA NA NA NA NA
## B 21 NA NA NA NA
## C 28 32 NA NA NA
## D 27 30 41 NA NA
## E 26 32 44 39 NA
##    A  B  C  D  E
## A NA 10  0  6  3
## B 21 NA  1  6  5
## C 28 32 NA  0  1
## D 27 30 41 NA  3
## E 26 32 44 39 NA
write.csv(pos_2, paste0(dir_path,Sys.Date(),"-","table.csv"),row.names = T,na = "")