rm(list = ls())
################# ##############input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*xlsx",dir_path,full.names = T, recursive = F)
#dir_path_name
library(openxlsx)
#getSheetNames(grep("data.xlsx",dir_path_name,value = T))
data_1 <- read.xlsx(grep("data.xlsx",dir_path_name,value = T),sheet = 1,startRow = 2)
#dim(data_1)
#View(data_1)
colnames(data_1) <- paste(colnames(data_1), as.character(data_1[1,]),sep = "_")
data_2 <- data_1[-1,]
#dim(data_2) #[1] 288 31
#colnames(data_2)
##################################
data_3 <- data_2[,c(grep("Pos\\/Neg \\(Curve Rank\\)", colnames(data_2)),30)]
colnames(data_3) <- LETTERS[1:ncol(data_3)]
#View(data_3)
#apply(data_3, 2, function(x) unique(x))
data_3[data_3 == "x"] <- NA
data_3[data_3 == "NA"] <- NA
##############################################
data_3 <- data_3[1:50,]
head(data_3,6)
## A B C D E
## 2 Negative Positive Negative Positive Positive
## 3 Negative Negative Negative Negative Negative
## 4 Negative Negative Negative Negative Negative
## 5 Negative Negative Negative Negative Negative
## 6 Negative Positive Negative Negative Negative
## 7 Negative Negative Negative Negative Negative
apply(data_3, 2, function(x) table(x))
## A B C D E
## Negative 29 32 49 42 44
## Positive 21 18 1 8 5
#colnames(data_3)
list_1 <- data.frame(combn(colnames(data_3), 2))
list_1
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 1 A A A A B B B C C D
## 2 B C D E C D E D E E
#View(list_1)
data_list <- list()
for (i in 1:ncol(list_1)) {
#i = 1
data_4 <- data_3[, list_1[,i]]
head(data_4)
data_5 <- data.frame(table(data_4[,1], data_4[,2]))
data_5$name_1 <- colnames(data_4[1])
data_5$name_2 <- colnames(data_4[2])
#colnames(data_5)[1:2] <- colnames(data_4)
data_list[[i]] <- data_5
}
data_6 <- do.call("rbind", data_list)
head(data_6)
## Var1 Var2 Freq name_1 name_2
## 1 Negative Negative 21 A B
## 2 Positive Negative 11 A B
## 3 Negative Positive 8 A B
## 4 Positive Positive 10 A B
## 5 Negative Negative 28 A C
## 6 Positive Negative 21 A C
#dim(data_6)
#View(data_6)
data_7 <- data_6[data_6$Var1 == data_6$Var2,]
pos <- data_7[data_7$Var1 == "Positive", ]
neg <- data_7[data_7$Var1 == "Negative", ]
pos; neg
## Var1 Var2 Freq name_1 name_2
## 4 Positive Positive 10 A B
## 8 Positive Positive 0 A C
## 12 Positive Positive 6 A D
## 16 Positive Positive 3 A E
## 20 Positive Positive 1 B C
## 24 Positive Positive 6 B D
## 28 Positive Positive 5 B E
## 32 Positive Positive 0 C D
## 36 Positive Positive 1 C E
## 40 Positive Positive 3 D E
## Var1 Var2 Freq name_1 name_2
## 1 Negative Negative 21 A B
## 5 Negative Negative 28 A C
## 9 Negative Negative 27 A D
## 13 Negative Negative 26 A E
## 17 Negative Negative 32 B C
## 21 Negative Negative 30 B D
## 25 Negative Negative 32 B E
## 29 Negative Negative 41 C D
## 33 Negative Negative 44 C E
## 37 Negative Negative 39 D E
#dim(pos); dim(neg)
########################################################Symmetric matrix
###positive
v <- unique(c(pos$name_1, pos$name_2))
pos_1 <- sapply(v, function(x) {
sapply(v, function(y) {
pos[pos$name_1 %in% c(x, y) & pos$name_2 %in% c(x, y), "Freq"]
})
})
pos_1[lower.tri(pos_1, diag = T)] <- NA
###negtive
v <- unique(c(neg$name_1, neg$name_2))
neg_1 <- sapply(v, function(x) {
sapply(v, function(y) {
neg[neg$name_1 %in% c(x, y) & neg$name_2 %in% c(x, y), "Freq"]
})
})
neg_1[upper.tri(neg_1, diag = T)] <- NA
#class(neg_1)
#View(neg_1)
neg_2 <- data.frame(neg_1)
#View(neg_2)
pos_2 <- data.frame(pos_1)
#View(pos_2)
pos_2[lower.tri(pos_2)] <- neg_2[lower.tri(neg_2)]
pos_2[] <- sapply(pos_2[], as.numeric)
#neg_2[] <- sapply(neg_2[], as.numeric)
pos_1; neg_1; pos_2
## A B C D E
## A NA 10 0 6 3
## B NA NA 1 6 5
## C NA NA NA 0 1
## D NA NA NA NA 3
## E NA NA NA NA NA
## A B C D E
## A NA NA NA NA NA
## B 21 NA NA NA NA
## C 28 32 NA NA NA
## D 27 30 41 NA NA
## E 26 32 44 39 NA
## A B C D E
## A NA 10 0 6 3
## B 21 NA 1 6 5
## C 28 32 NA 0 1
## D 27 30 41 NA 3
## E 26 32 44 39 NA
write.csv(pos_2, paste0(dir_path,Sys.Date(),"-","table.csv"),row.names = T,na = "")