counts <- read.table("~/Desktop/Orbsen_24/GSE231660/GSE231660_raw_counts_GRCh38.p13_NCBI.tsv", header= T, sep="\t", row.names = "GeneID")

gene_meta <- read.table("~/Desktop/Orbsen_24/GSE231660/genes.txt", header = T, sep="\t")

gene_list <- readxl::read_xls("~/Desktop/Orbsen_24/Genes related to Efferocytosis.xls")

meta <- GEOquery::getGEO("GSE231660")
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## Found 1 file(s)
## GSE231660_series_matrix.txt.gz
meta <- meta$GSE231660_series_matrix.txt.gz@phenoData@data

pheno <- data.frame(patient = meta$title,
                    day = meta[,36],
                    treatment = meta[,38])

pheno$patient <- gsub('.{3}$', '', pheno$patient)

rownames(pheno) <- meta$geo_accession

pheno <- pheno[which(rownames(pheno) %in% colnames(counts)), ]

genes to counts

gene_meta <- gene_meta[which(gene_meta$GeneType == "protein-coding"),]
gene_meta <- gene_meta[,c(1:2)]

count <- merge(counts, gene_meta, by.x = 0, by.y = "GeneID")
count <- tibble::column_to_rownames(count, "Symbol")
count <- count %>% dplyr::select(-c("Row.names"))
dds <- DESeqDataSetFromMatrix(count, pheno, design = ~ 1)
vst <- assay(vst(dds))
orbsen_gene_list <- list(`Efferocytosis-related genes` = gene_list$`Efferocytosis-related genes`)
gsva_results <- gsva(
  vst,
  orbsen_gene_list,
  method = "gsva",
  # Appropriate for our vst transformed data
  kcdf = "Gaussian",
  # Minimum gene set size
  min.sz = 15,
  # Maximum gene set size
  max.sz = 500,
  # Compute Gaussian-distributed scores
  mx.diff = TRUE,
  # Don't print out the progress bar
  verbose = FALSE
)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning in .filterFeatures(expr, method): 110 genes with constant expression
## values throuhgout the samples.
## Warning in .filterFeatures(expr, method): Since argument method!="ssgsea",
## genes with constant expression values are discarded.
gsva_df <- as.data.frame(t(gsva_results))
gsva_df <- cbind(gsva_df, pheno)
colnames(gsva_df)[1] <- "Efferocytosis_genes"
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
t.test(Efferocytosis_genes ~ treatment, gsva_df)
## 
##  Welch Two Sample t-test
## 
## data:  Efferocytosis_genes by treatment
## t = -0.78365, df = 94.35, p-value = 0.4352
## alternative hypothesis: true difference in means between group ORBCEL-C and group Placebo is not equal to 0
## 95 percent confidence interval:
##  -0.14106708  0.06122273
## sample estimates:
## mean in group ORBCEL-C  mean in group Placebo 
##           -0.034064000            0.005858175
treatment_t <- aov(Efferocytosis_genes ~ treatment, gsva_df)
TukeyHSD(treatment_t)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Efferocytosis_genes ~ treatment, data = gsva_df)
## 
## $treatment
##                        diff         lwr       upr    p adj
## Placebo-ORBCEL-C 0.03992218 -0.06085702 0.1407014 0.433656
comps <- list(c("Placebo", "ORBCEL-C"))
signif_yaxis <- max(gsva_df$Efferocytosis_genes) + (max(gsva_df$Efferocytosis_genes)*0.05)
ggboxplot(gsva_df, 
          x = "treatment", 
          y="Efferocytosis_genes",
          add = c("jitter", "mean_sd")) +
          stat_compare_means(comparisons = comps, method = "t.test", label.y = signif_yaxis)

ggboxplot(gsva_df, 
          x = "treatment", 
          y = "Efferocytosis_genes", 
          facet.by = "day") + 
          stat_compare_means(comparisons = comps, method = "t.test", label.y = signif_yaxis)

plot(TukeyHSD(aov(Efferocytosis_genes ~ day:treatment, gsva_df)))

plot(TukeyHSD(aov(Efferocytosis_genes ~ day:treatment, gsva_df)), las = 2)

comps <- list(c("0", "4"), c("4", "7"), c("0", "7"))
signif_yaxis <- max(gsva_df$Efferocytosis_genes) + (max(gsva_df$Efferocytosis_genes)*0.05)
ggboxplot(gsva_df, 
          x = "day", 
          y = "Efferocytosis_genes", 
          facet.by = "treatment") + 
          stat_compare_means(comparisons = comps, method = "t.test")

TukeyHSD(aov(Efferocytosis_genes ~ treatment:day, gsva_df))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Efferocytosis_genes ~ treatment:day, data = gsva_df)
## 
## $`treatment:day`
##                              diff        lwr        upr     p adj
## Placebo:0-ORBCEL-C:0   0.13637227 -0.0878045 0.36054904 0.4898310
## ORBCEL-C:4-ORBCEL-C:0 -0.01640576 -0.2711022 0.23829066 0.9999670
## Placebo:4-ORBCEL-C:0   0.07323834 -0.1870371 0.33351379 0.9633680
## ORBCEL-C:7-ORBCEL-C:0  0.09569046 -0.1421548 0.33353571 0.8495565
## Placebo:7-ORBCEL-C:0  -0.06395477 -0.3305916 0.20268203 0.9817437
## ORBCEL-C:4-Placebo:0  -0.15277803 -0.3978985 0.09234246 0.4621116
## Placebo:4-Placebo:0   -0.06313392 -0.3140464 0.18777860 0.9774343
## ORBCEL-C:7-Placebo:0  -0.04068181 -0.2682431 0.18687951 0.9952529
## Placebo:7-Placebo:0   -0.20032704 -0.4578323 0.05717825 0.2195318
## Placebo:4-ORBCEL-C:4   0.08964410 -0.1888736 0.36816185 0.9359463
## ORBCEL-C:7-ORBCEL-C:4  0.11209621 -0.1455842 0.36977662 0.8026105
## Placebo:7-ORBCEL-C:4  -0.04754902 -0.3320205 0.23692245 0.9965414
## ORBCEL-C:7-Placebo:4   0.02245211 -0.2407441 0.28564830 0.9998675
## Placebo:7-Placebo:4   -0.13719312 -0.4266703 0.15228410 0.7393178
## Placebo:7-ORBCEL-C:7  -0.15964523 -0.4291339 0.10984339 0.5197597
plot(TukeyHSD(aov(Efferocytosis_genes ~ treatment:day, gsva_df)), las = 2)

library(pheatmap)

gene_mat <- vst[which(rownames(vst) %in% gene_list$`Efferocytosis-related genes`), ]
gene_mat = t(scale(t(gene_mat), center = T))

col <- c("forestgreen", "purple")
names(col) <- c("Placebo", "ORBCEL-C")
ann_clr <- list(treatment = col)

pheatmap(gene_mat,
         cluster_rows = F,
         cluster_cols = F,
         scale = "none",
         height = 20,
         annotation_col = pheno[,2:3],
         annotation_colors = ann_clr,
         color = hcl.colors(100, "RdBu",rev=T))