Fill NA use KNN

install.packages("impute")
library(impute)
data <- matrix(rnorm(100), ncol = 10)
data[sample(1:100, 10)] <- NA 
imputed_data <- impute.knn(data, k = 5)
print(imputed_data)

IQR normalization

data <- data.frame(value = c(10, 15, 20, 22, 25, 30, 35, 40, 45, 100))
Q1 <- quantile(data$value, 0.25)
Q3 <- quantile(data$value, 0.75)
IQR_value <- Q3 - Q1
data$IQR_normalized <- (data$value - median(data$value)) / IQR_value
print(data)

Read data

full_d_sub <- read.table("Expression.csv",header=T,sep=",")
dim(full_d_sub)
## [1] 242  99
datatable(full_d_sub[,1:10])

Differential expression analysis

library(genefilter)
fc <- colttests(as.matrix(full_d_sub[,-c(1:4)]), as.factor(full_d_sub$Group)) ### Cancer vs Non Cancer or 1 vs 0

# Adjust p.value by BH
fc$FDR <- p.adjust(fc$p.value, method = "BH", n = length(fc$p.value))

colnames(fc)[2] <- c("logFC")
fc$Feature <- rownames(fc)
fc$fc_direct <- ifelse(fc$logFC >0, "up","down")
datatable(fc[1:10,])
# Original genes N
nrow(fc)
## [1] 95
fc_filter <- fc[abs(fc$logFC)>log2(2) & fc$FDR < 0.05,]
datatable(fc_filter[1:10,])
# N after fitler
nrow(fc_filter)
## [1] 25

Volcano plot

# All genes
p <- ggplot(fc, aes(logFC, -log(FDR, 10), fill = fc_direct)) +
  geom_point(size = 1, shape=21, color="black") + scale_fill_manual(values = c("#0072B5FF", "#BC3C29FF")) +
  xlab(expression("log"[2] * "FC")) +
  ylab(expression("-log"[10] * "FDR")) +
  geom_label_repel(
    data = fc,
    mapping = aes(logFC, -log(FDR, 10), label = Feature),
    size = 2, max.overlaps = 50, force = 2
  ) + theme_bw()
print(p)

# Filtered genes
p1 <- ggplot(fc, aes(logFC, -log(FDR, 10), fill = fc_direct)) +
  geom_point(size = 1, shape=21, color="black") + scale_fill_manual(values = c("#0072B5FF", "#BC3C29FF")) +
  xlab(expression("log"[2] * "FC")) +
  ylab(expression("-log"[10] * "FDR")) +
  geom_label_repel(
    data = fc_filter,
    mapping = aes(logFC, -log(FDR, 10), label = Feature),
    size = 2, max.overlaps = 50, force = 2
  ) + theme_bw()
print(p1)