knitr::opts_chunk$set(echo = TRUE)
dds <- read_csv("Count Table RNA Seq Data for 6703 presentation.csv")
## Rows: 8232 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): id, Predicted annotation/function, Predicted ortholog
## dbl (9): NBF1, NBF2, NBF3, BF3hr1, BF3hr2, BF3hr3, BF24hr1, BF24hr3, BF24hr4
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# I made a csv from the excel file because idk what an rds file is or how to read it 

raw <- read.csv("Count Table RNA Seq Data for 6703 presentation.csv",
                header = TRUE,
                stringsAsFactors = FALSE)
# need to get just the columns we care about 
counts <- raw[, c("NBF1","NBF2","NBF3",
                  "BF3hr1","BF3hr2","BF3hr3",
                  "BF24hr1","BF24hr3","BF24hr4")]


#but we need the gene IDs too 
rownames(counts) <- raw$id
# make a matrix and make the counts be numbers 
#the numbers themnselves are the number of sequencing reads mapped to that gene 
#I THINK higher number = more RNA
counts <- as.matrix(counts)
mode(counts) <- "numeric"

head(counts)
##                       NBF1 NBF2 NBF3 BF3hr1 BF3hr2 BF3hr3 BF24hr1 BF24hr3
## Locus10002v1rpkm9.17    51   50   50    298    191    200     128      93
## Locus10003v1rpkm9.17    31   77   78     15     31     24      45      38
## Locus1000v1rpkm136.27 3044 4470 3098   1289   1344   1276    2222    1897
## Locus1000v2rpkm50.24   866 1076  757    470    643    510     393     376
## Locus10016v1rpkm9.16    75  133  115     38     58     51      50      36
## Locus10018v1rpkm9.16    44   73   47     65     53     34     108      69
##                       BF24hr4
## Locus10002v1rpkm9.17      151
## Locus10003v1rpkm9.17       25
## Locus1000v1rpkm136.27    2371
## Locus1000v2rpkm50.24      509
## Locus10016v1rpkm9.16       56
## Locus10018v1rpkm9.16       80
condition <- factor(c(rep("NBF",3),
                      rep("BF3hr",3),
                      rep("BF24hr",3)))

coldata <- data.frame(
  row.names = colnames(counts),
  condition = condition
)
# Use DESeq2 like in lab 

dds <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = coldata,
  design = ~ condition
)
## converting counts to integer mode
#normalized to account for differences in library size
dds_vst1 <- varianceStabilizingTransformation(dds)

PCA Plot

#plot 
plotPCA(dds_vst1, intgroup = "condition", ntop = 500)
## using ntop=500 top features by variance

#distance along X axis here counts double bc it explains more variance 
#make pretty
pca_df <- plotPCA(
  dds_vst1,
  ntop = 500,
  intgroup = c("condition"),
  returnData = TRUE
)
## using ntop=500 top features by variance
pct_var <- round(100 * attr(pca_df, "percentVar"), 1)
pct_var
## [1] 49.4 28.2
ggplot(pca_df) +
  aes(x = PC1, y = PC2, color = condition) +
  geom_point(size = 10) +
  scale_color_brewer(palette = "Dark2", name = "Feeding Treatment") +
  theme_bw(base_size = 26) +
  labs(
    x = paste0("PC1 (", pct_var[1], "%)"),
    y = paste0("PC2 (", pct_var[2], "%)")
  )

Differential expression

#Differentional expression 
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

Liz: NBF vs. BF3

res_NBF.BF3 <- results(dds, contrast = c("condition", "NBF", "BF3hr"))
head(res_NBF.BF3)
## log2 fold change (MLE): condition NBF vs BF3hr 
## Wald test p-value: condition NBF vs BF3hr 
## DataFrame with 6 rows and 6 columns
##                        baseMean log2FoldChange     lfcSE       stat      pvalue
##                       <numeric>      <numeric> <numeric>  <numeric>   <numeric>
## Locus10002v1rpkm9.17   155.2077      -3.042106 0.2926413 -10.395343 2.60348e-25
## Locus10003v1rpkm9.17    37.9684       0.556711 0.4656658   1.195517 2.31885e-01
## Locus1000v1rpkm136.27 2187.7091       0.569815 0.0922209   6.178813 6.45854e-10
## Locus1000v2rpkm50.24   590.7223      -0.121061 0.1536421  -0.787939 4.30732e-01
## Locus10016v1rpkm9.16    62.6007       0.277192 0.2866525   0.966997 3.33546e-01
## Locus10018v1rpkm9.16    67.4962      -0.780650 0.3325833  -2.347230 1.89136e-02
##                              padj
##                         <numeric>
## Locus10002v1rpkm9.17  1.96121e-23
## Locus10003v1rpkm9.17  3.59586e-01
## Locus1000v1rpkm136.27 1.04455e-08
## Locus1000v2rpkm50.24  5.66604e-01
## Locus10016v1rpkm9.16  4.71060e-01
## Locus10018v1rpkm9.16  4.99676e-02
res_NBF.BF3
## log2 fold change (MLE): condition NBF vs BF3hr 
## Wald test p-value: condition NBF vs BF3hr 
## DataFrame with 8232 rows and 6 columns
##                          baseMean log2FoldChange     lfcSE       stat
##                         <numeric>      <numeric> <numeric>  <numeric>
## Locus10002v1rpkm9.17     155.2077      -3.042106 0.2926413 -10.395343
## Locus10003v1rpkm9.17      37.9684       0.556711 0.4656658   1.195517
## Locus1000v1rpkm136.27   2187.7091       0.569815 0.0922209   6.178813
## Locus1000v2rpkm50.24     590.7223      -0.121061 0.1536421  -0.787939
## Locus10016v1rpkm9.16      62.6007       0.277192 0.2866525   0.966997
## ...                           ...            ...       ...        ...
## Locus998v1rpkm136.49    2767.9947       0.349953  0.146924   2.381864
## Locus999v1rpkm136.41    1313.6537       1.667015  0.180132   9.254418
## Locus999v2rpkm3.78_PRE    12.1084       0.869733  0.881698   0.986429
## Locus99v1rpkm1550.33    6455.4607      -1.006157  0.300965  -3.343101
## Locus9v1rpkm6429.19    23308.1282      -0.175770  0.170318  -1.032008
##                             pvalue        padj
##                          <numeric>   <numeric>
## Locus10002v1rpkm9.17   2.60348e-25 1.96121e-23
## Locus10003v1rpkm9.17   2.31885e-01 3.59586e-01
## Locus1000v1rpkm136.27  6.45854e-10 1.04455e-08
## Locus1000v2rpkm50.24   4.30732e-01 5.66604e-01
## Locus10016v1rpkm9.16   3.33546e-01 4.71060e-01
## ...                            ...         ...
## Locus998v1rpkm136.49   1.72253e-02 4.62514e-02
## Locus999v1rpkm136.41   2.15400e-20 1.17910e-18
## Locus999v2rpkm3.78_PRE 3.23922e-01 4.60160e-01
## Locus99v1rpkm1550.33   8.28479e-04 3.65733e-03
## Locus9v1rpkm6429.19    3.02068e-01 4.37671e-01
sum(res_NBF.BF3$padj < 0.05, na.rm = TRUE)
## [1] 3108
#sum = 3108 --> number of differential expressed genes 
#1 --> filter by values with padj < 0.005, take those, match with orthologs, run David's analysis? 
#2 --> merge this with raw data based on gene ID to get back to orthologs 

Emily: NBF vs. BF24

res_NBF.BF24 <- results(dds, contrast = c("condition", "NBF", "BF24hr"))
head(res_NBF.BF24)
## log2 fold change (MLE): condition NBF vs BF24hr 
## Wald test p-value: condition NBF vs BF24hr 
## DataFrame with 6 rows and 6 columns
##                        baseMean log2FoldChange     lfcSE      stat      pvalue
##                       <numeric>      <numeric> <numeric> <numeric>   <numeric>
## Locus10002v1rpkm9.17   155.2077      -2.138095 0.2972055 -7.193996 6.29221e-13
## Locus10003v1rpkm9.17    37.9684      -0.116472 0.4538049 -0.256657 7.97443e-01
## Locus1000v1rpkm136.27 2187.7091      -0.173208 0.0910788 -1.901733 5.72060e-02
## Locus1000v2rpkm50.24   590.7223       0.208926 0.1547960  1.349685 1.77117e-01
## Locus10016v1rpkm9.16    62.6007       0.315576 0.2876792  1.096972 2.72654e-01
## Locus10018v1rpkm9.16    67.4962      -1.539807 0.3243016 -4.748069 2.05368e-06
##                              padj
##                         <numeric>
## Locus10002v1rpkm9.17  1.81920e-11
## Locus10003v1rpkm9.17  8.74004e-01
## Locus1000v1rpkm136.27 1.37104e-01
## Locus1000v2rpkm50.24  3.15877e-01
## Locus10016v1rpkm9.16  4.29200e-01
## Locus10018v1rpkm9.16  2.15448e-05
res_NBF.BF24
## log2 fold change (MLE): condition NBF vs BF24hr 
## Wald test p-value: condition NBF vs BF24hr 
## DataFrame with 8232 rows and 6 columns
##                          baseMean log2FoldChange     lfcSE       stat
##                         <numeric>      <numeric> <numeric>  <numeric>
## Locus10002v1rpkm9.17     155.2077      -2.138095 0.2972055  -7.193996
## Locus10003v1rpkm9.17      37.9684      -0.116472 0.4538049  -0.256657
## Locus1000v1rpkm136.27   2187.7091      -0.173208 0.0910788  -1.901733
## Locus1000v2rpkm50.24     590.7223       0.208926 0.1547960   1.349685
## Locus10016v1rpkm9.16      62.6007       0.315576 0.2876792   1.096972
## ...                           ...            ...       ...        ...
## Locus998v1rpkm136.49    2767.9947      0.4417545  0.147016  3.0048104
## Locus999v1rpkm136.41    1313.6537      0.4863642  0.178014  2.7321671
## Locus999v2rpkm3.78_PRE    12.1084     -0.0433445  0.852601 -0.0508379
## Locus99v1rpkm1550.33    6455.4607     -0.8703339  0.300986 -2.8916134
## Locus9v1rpkm6429.19    23308.1282      0.1873659  0.170347  1.0999069
##                             pvalue        padj
##                          <numeric>   <numeric>
## Locus10002v1rpkm9.17   6.29221e-13 1.81920e-11
## Locus10003v1rpkm9.17   7.97443e-01 8.74004e-01
## Locus1000v1rpkm136.27  5.72060e-02 1.37104e-01
## Locus1000v2rpkm50.24   1.77117e-01 3.15877e-01
## Locus10016v1rpkm9.16   2.72654e-01 4.29200e-01
## ...                            ...         ...
## Locus998v1rpkm136.49    0.00265746   0.0114183
## Locus999v1rpkm136.41    0.00629192   0.0234904
## Locus999v2rpkm3.78_PRE  0.95945470   0.9784980
## Locus99v1rpkm1550.33    0.00383269   0.0155511
## Locus9v1rpkm6429.19     0.27137267   0.4276682
sum(res_NBF.BF24$padj < 0.05, na.rm = TRUE)
## [1] 2613
# First convert the results table into a regular data frame:
as.data.frame(res_NBF.BF24) |>
  # Then, only select the rows/genes that are significant:
  filter(padj < 0.05) |>
  # If you run count() on a logical test, you get the nrs. that are FALSE v. TRUE
  dplyr::count(log2FoldChange > 0)
##   log2FoldChange > 0    n
## 1              FALSE 1439
## 2               TRUE 1174
# Down-regulated (NMBF < BF3):
sum(res_NBF.BF24$log2FoldChange < -1 & res_NBF.BF24$padj < 0.05, na.rm = TRUE)
## [1] 517
# Up-regulated (NBF > BF3):
sum(res_NBF.BF24$log2FoldChange > 1 & res_NBF.BF24$padj < 0.05, na.rm = TRUE)
## [1] 459
EnhancedVolcano(
  toptable = res_NBF.BF24,      
  title = "Non-blood-fed vs. Bloodfed at 24hr dpi",
  x = "log2FoldChange",     
  y = "padj",             
  lab = rownames(res_NBF.BF24), 
  labSize = 4,               # Now we will show the gene labels
  pCutoff = 10e-10,          # Modify the p-value cut-off
  subtitle = NULL,           # I'll also remove the silly subtitle
  caption = NULL,            # ... and the caption
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the EnhancedVolcano package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the EnhancedVolcano package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

range(res_NBF.BF24$log2FoldChange, na.rm = TRUE)
## [1] -9.282035  7.767340
x <- filter(as.data.frame(res_NBF.BF24$log2FoldChange > 40, na.rm = TRUE))

as.data.frame(res_NBF.BF24) |> filter(log2FoldChange > 14)
## [1] baseMean       log2FoldChange lfcSE          stat           pvalue        
## [6] padj          
## <0 rows> (or 0-length row.names)
# Convert the results table into a regular dataframe and sort by adjusted p-value:
top_df <- data.frame(res_NBF.BF24) |> arrange(padj)

# Gene IDs are the row names: extract the first row name to get the gene of interest
top1 <- rownames(top_df)[1]
top1
## [1] "Locus569v2rpkm130.88"
# Like with the PlotPCA function, 'intgroup' specifies which variable to use for grouping:
plotCounts(dds, gene = top1, intgroup = "condition")

# Use plotCounts with returnData = TRUE just to get a single-gene count table
# that is suitable for plotting:
focal_gene_counts <- plotCounts(
  dds,
  gene = top1,
  intgroup = c("condition"),
  returnData = TRUE
)

# Take a look at the resulting dataframe:
head(focal_gene_counts)
##           count condition
## NBF1   1821.229       NBF
## NBF2   1593.046       NBF
## NBF3   1921.055       NBF
## BF3hr1 1634.110     BF3hr
## BF3hr2 1637.078     BF3hr
## BF3hr3 1522.305     BF3hr
focal_gene_counts |>
  ggplot(aes(x = condition, y = count, fill = condition)) +
  geom_boxplot(alpha = 0.5, outlier.shape = NA) +
  geom_point(size = 4, shape = 21) +
  theme_bw() +
  labs(
    x = "Feeding Treatment",
    y = "Count"
  ) +
  theme(
    legend.position = "none",
    # Make all text bigger:
    axis.title = element_text(size = 30),
    axis.text = element_text(size = 30),
    plot.title = element_text(size = 30, face = "bold"),
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30)
  )

Mohamad: BF3 vs. BF24

res_BF3.BF24 <- results(dds, contrast = c("condition", "BF3hr", "BF24hr"))
head(res_BF3.BF24)
## log2 fold change (MLE): condition BF3hr vs BF24hr 
## Wald test p-value: condition BF3hr vs BF24hr 
## DataFrame with 6 rows and 6 columns
##                        baseMean log2FoldChange     lfcSE      stat      pvalue
##                       <numeric>      <numeric> <numeric> <numeric>   <numeric>
## Locus10002v1rpkm9.17   155.2077      0.9040112  0.278473   3.24632 1.16907e-03
## Locus10003v1rpkm9.17    37.9684     -0.6731836  0.473807  -1.42080 1.55376e-01
## Locus1000v1rpkm136.27 2187.7091     -0.7430229  0.092877  -8.00007 1.24345e-15
## Locus1000v2rpkm50.24   590.7223      0.3299864  0.156404   2.10983 3.48729e-02
## Locus10016v1rpkm9.16    62.6007      0.0383842  0.300676   0.12766 8.98418e-01
## Locus10018v1rpkm9.16    67.4962     -0.7591571  0.325100  -2.33515 1.95357e-02
##                              padj
##                         <numeric>
## Locus10002v1rpkm9.17  4.04519e-03
## Locus10003v1rpkm9.17  2.56957e-01
## Locus1000v1rpkm136.27 2.67978e-14
## Locus1000v2rpkm50.24  7.56715e-02
## Locus10016v1rpkm9.16  9.31546e-01
## Locus10018v1rpkm9.16  4.70265e-02
res_BF3.BF24
## log2 fold change (MLE): condition BF3hr vs BF24hr 
## Wald test p-value: condition BF3hr vs BF24hr 
## DataFrame with 8232 rows and 6 columns
##                          baseMean log2FoldChange     lfcSE      stat
##                         <numeric>      <numeric> <numeric> <numeric>
## Locus10002v1rpkm9.17     155.2077      0.9040112  0.278473   3.24632
## Locus10003v1rpkm9.17      37.9684     -0.6731836  0.473807  -1.42080
## Locus1000v1rpkm136.27   2187.7091     -0.7430229  0.092877  -8.00007
## Locus1000v2rpkm50.24     590.7223      0.3299864  0.156404   2.10983
## Locus10016v1rpkm9.16      62.6007      0.0383842  0.300676   0.12766
## ...                           ...            ...       ...       ...
## Locus998v1rpkm136.49    2767.9947      0.0918015  0.147632  0.621825
## Locus999v1rpkm136.41    1313.6537     -1.1806512  0.181163 -6.517064
## Locus999v2rpkm3.78_PRE    12.1084     -0.9130771  0.895973 -1.019090
## Locus99v1rpkm1550.33    6455.4607      0.1358235  0.300963  0.451297
## Locus9v1rpkm6429.19    23308.1282      0.3631358  0.170381  2.131312
##                             pvalue        padj
##                          <numeric>   <numeric>
## Locus10002v1rpkm9.17   1.16907e-03 4.04519e-03
## Locus10003v1rpkm9.17   1.55376e-01 2.56957e-01
## Locus1000v1rpkm136.27  1.24345e-15 2.67978e-14
## Locus1000v2rpkm50.24   3.48729e-02 7.56715e-02
## Locus10016v1rpkm9.16   8.98418e-01 9.31546e-01
## ...                            ...         ...
## Locus998v1rpkm136.49   5.34057e-01 6.55379e-01
## Locus999v1rpkm136.41   7.16967e-11 8.60675e-10
## Locus999v2rpkm3.78_PRE 3.08160e-01 4.36485e-01
## Locus99v1rpkm1550.33   6.51776e-01 7.50909e-01
## Locus9v1rpkm6429.19    3.30634e-02 7.23763e-02
sum(res_BF3.BF24$padj < 0.05, na.rm = TRUE)
## [1] 3456

Heat Maps

Emily NBF vs. BF24

meta <- data.frame(colData(dds))

View(meta)

norm_mat <- assay(dds_vst1)
top25_DE <- rownames(top_df)[1:25]
norm_mat_sel <- norm_mat[match(top25_DE, rownames(norm_mat)), ]
meta_sort <- meta |> arrange(condition)
# Sort the columns in the normalized count matrix to match the metadata
norm_mat_sel <- norm_mat_sel[, rownames(meta_sort)]


pheatmap(
  norm_mat_sel,
  annotation_col = meta_sort,  # Add the metadata
  cluster_cols = FALSE,        # Don't cluster samples (=columns, cols)
  show_rownames = FALSE,       # Don't show gene names
  scale = "row",               # Perform z-scaling for each gene
)

top25_hi <- names(sort(rowMeans(norm_mat), decreasing = TRUE)[1:25])
# In the normalized count matrix, select only the genes of interest
norm_mat_sel <- norm_mat[match(top25_hi, rownames(norm_mat)), ]

# Sort the metadata
meta_sort <- meta |>
  arrange(condition) |>
  select(condition)

# Create the heatmap
pheatmap(
  norm_mat_sel,
  annotation_col = meta_sort,
  cluster_cols = FALSE,
  show_rownames = FALSE,
  scale = "row"
)

---
title: "Final Data Analysis"
author: "Emily Runnion"
date: "2026-02-26"
output:
  html_document:
    toc: true
    toc_depth: 4
    number_sections: false
    toc_float: true
    theme: journal
    code_download: true
---

```{r setup, include=TRUE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r load libraries, include=FALSE}

library(DESeq2)    # For differential expression analysis
library(EnhancedVolcano) # For creating volcano plots
library(tidyverse) 
library(ggplot2)
library(pheatmap)
```


```{r}
dds <- read_csv("Count Table RNA Seq Data for 6703 presentation.csv")
# I made a csv from the excel file because idk what an rds file is or how to read it 

raw <- read.csv("Count Table RNA Seq Data for 6703 presentation.csv",
                header = TRUE,
                stringsAsFactors = FALSE)
```

```{r}
# need to get just the columns we care about 
counts <- raw[, c("NBF1","NBF2","NBF3",
                  "BF3hr1","BF3hr2","BF3hr3",
                  "BF24hr1","BF24hr3","BF24hr4")]


#but we need the gene IDs too 
rownames(counts) <- raw$id

```

```{r}
# make a matrix and make the counts be numbers 
#the numbers themnselves are the number of sequencing reads mapped to that gene 
#I THINK higher number = more RNA
counts <- as.matrix(counts)
mode(counts) <- "numeric"

head(counts)

condition <- factor(c(rep("NBF",3),
                      rep("BF3hr",3),
                      rep("BF24hr",3)))

coldata <- data.frame(
  row.names = colnames(counts),
  condition = condition
)


```

```{r}
# Use DESeq2 like in lab 

dds <- DESeqDataSetFromMatrix(
  countData = counts,
  colData = coldata,
  design = ~ condition
)

```


```{r}
#normalized to account for differences in library size
dds_vst1 <- varianceStabilizingTransformation(dds)

```


# PCA Plot

```{r}
#plot 
plotPCA(dds_vst1, intgroup = "condition", ntop = 500)
#distance along X axis here counts double bc it explains more variance 
```

```{r, fig.width=14, fig.height=8}
#make pretty
pca_df <- plotPCA(
  dds_vst1,
  ntop = 500,
  intgroup = c("condition"),
  returnData = TRUE
)

pct_var <- round(100 * attr(pca_df, "percentVar"), 1)
pct_var

ggplot(pca_df) +
  aes(x = PC1, y = PC2, color = condition) +
  geom_point(size = 10) +
  scale_color_brewer(palette = "Dark2", name = "Feeding Treatment") +
  theme_bw(base_size = 26) +
  labs(
    x = paste0("PC1 (", pct_var[1], "%)"),
    y = paste0("PC2 (", pct_var[2], "%)")
  )
```

# Differential expression 

```{r}
#Differentional expression 
dds <- DESeq(dds)
```

## Liz: NBF vs. BF3

```{r}
res_NBF.BF3 <- results(dds, contrast = c("condition", "NBF", "BF3hr"))
head(res_NBF.BF3)
res_NBF.BF3
sum(res_NBF.BF3$padj < 0.05, na.rm = TRUE)
#sum = 3108 --> number of differential expressed genes 
#1 --> filter by values with padj < 0.005, take those, match with orthologs, run David's analysis? 
#2 --> merge this with raw data based on gene ID to get back to orthologs 
```


## Emily: NBF vs. BF24

```{r}
res_NBF.BF24 <- results(dds, contrast = c("condition", "NBF", "BF24hr"))
head(res_NBF.BF24)
res_NBF.BF24
sum(res_NBF.BF24$padj < 0.05, na.rm = TRUE)

```

```{r}
# First convert the results table into a regular data frame:
as.data.frame(res_NBF.BF24) |>
  # Then, only select the rows/genes that are significant:
  filter(padj < 0.05) |>
  # If you run count() on a logical test, you get the nrs. that are FALSE v. TRUE
  dplyr::count(log2FoldChange > 0)
```

```{r}
# Down-regulated (NMBF < BF3):
sum(res_NBF.BF24$log2FoldChange < -1 & res_NBF.BF24$padj < 0.05, na.rm = TRUE)

# Up-regulated (NBF > BF3):
sum(res_NBF.BF24$log2FoldChange > 1 & res_NBF.BF24$padj < 0.05, na.rm = TRUE)

```
```{r, fig.width=12, fig.height=8}
EnhancedVolcano(
  toptable = res_NBF.BF24,      
  title = "Non-blood-fed vs. Bloodfed at 24hr dpi",
  x = "log2FoldChange",     
  y = "padj",             
  lab = rownames(res_NBF.BF24), 
  labSize = 4,               # Now we will show the gene labels
  pCutoff = 10e-10,          # Modify the p-value cut-off
  subtitle = NULL,           # I'll also remove the silly subtitle
  caption = NULL,            # ... and the caption
  )
```

```{r, fig.width=8, fig.height=6, out.width="80%"}
range(res_NBF.BF24$log2FoldChange, na.rm = TRUE)

x <- filter(as.data.frame(res_NBF.BF24$log2FoldChange > 40, na.rm = TRUE))

as.data.frame(res_NBF.BF24) |> filter(log2FoldChange > 14)


# Convert the results table into a regular dataframe and sort by adjusted p-value:
top_df <- data.frame(res_NBF.BF24) |> arrange(padj)

# Gene IDs are the row names: extract the first row name to get the gene of interest
top1 <- rownames(top_df)[1]
top1

# Like with the PlotPCA function, 'intgroup' specifies which variable to use for grouping:
plotCounts(dds, gene = top1, intgroup = "condition")



# Use plotCounts with returnData = TRUE just to get a single-gene count table
# that is suitable for plotting:
focal_gene_counts <- plotCounts(
  dds,
  gene = top1,
  intgroup = c("condition"),
  returnData = TRUE
)

# Take a look at the resulting dataframe:
head(focal_gene_counts)



focal_gene_counts |>
  ggplot(aes(x = condition, y = count, fill = condition)) +
  geom_boxplot(alpha = 0.5, outlier.shape = NA) +
  geom_point(size = 4, shape = 21) +
  theme_bw() +
  labs(
    x = "Feeding Treatment",
    y = "Count"
  ) +
  theme(
    legend.position = "none",
    # Make all text bigger:
    axis.title = element_text(size = 30),
    axis.text = element_text(size = 30),
    plot.title = element_text(size = 30, face = "bold"),
    legend.text = element_text(size = 30),
    legend.title = element_text(size = 30)
  )
```

## Mohamad: BF3 vs. BF24

```{r}
res_BF3.BF24 <- results(dds, contrast = c("condition", "BF3hr", "BF24hr"))
head(res_BF3.BF24)
res_BF3.BF24
sum(res_BF3.BF24$padj < 0.05, na.rm = TRUE)
```

# Heat Maps

## Emily NBF vs. BF24
```{r}
meta <- data.frame(colData(dds))

View(meta)

norm_mat <- assay(dds_vst1)
top25_DE <- rownames(top_df)[1:25]
norm_mat_sel <- norm_mat[match(top25_DE, rownames(norm_mat)), ]
meta_sort <- meta |> arrange(condition)
```

```{r}
# Sort the columns in the normalized count matrix to match the metadata
norm_mat_sel <- norm_mat_sel[, rownames(meta_sort)]


pheatmap(
  norm_mat_sel,
  annotation_col = meta_sort,  # Add the metadata
  cluster_cols = FALSE,        # Don't cluster samples (=columns, cols)
  show_rownames = FALSE,       # Don't show gene names
  scale = "row",               # Perform z-scaling for each gene
)
```

```{r}
top25_hi <- names(sort(rowMeans(norm_mat), decreasing = TRUE)[1:25])
# In the normalized count matrix, select only the genes of interest
norm_mat_sel <- norm_mat[match(top25_hi, rownames(norm_mat)), ]

# Sort the metadata
meta_sort <- meta |>
  arrange(condition) |>
  select(condition)

# Create the heatmap
pheatmap(
  norm_mat_sel,
  annotation_col = meta_sort,
  cluster_cols = FALSE,
  show_rownames = FALSE,
  scale = "row"
)


```

