Set Up
Load Libraries and Set Working Directory
Rename samples
# Define mappings for renaming organs and time point samples
organ_mapping <- c(
amyg = "Amygdala",
drg = "DRG",
heart = "Heart",
hypo = "Hypothalamus",
lhip = "Left_Hippocampus",
musc = "Muscle",
rhip = "Right_Hippocampus",
sc = "Spinal_Cord"
)
time_mapping <- c(
"2" = "2hrs",
"24" = "24hrs",
"72" = "72hrs"
)
Read + combine files
# Read all .txt files in the "miRNA results" folder and combine them into a single dataframe
df <- list.files("miRNA results", pattern = "\\.txt$", full.names = TRUE) %>%
set_names(~ tools::file_path_sans_ext(basename(.))) %>%
map_dfr(~ read_tsv(.x, skip = 2, show_col_types = FALSE), .id = "sample") %>%
select(-c(3, 7, 11)) %>%
mutate(
# Extract organ and timepoint information using regex
organ_code = str_extract(sample, "^[a-zA-Z]+"),
time_code = str_extract(sample, "\\d+$"),
# Map to full names
sample = paste0(organ_mapping[organ_code], "_", time_mapping[time_code])
) %>%
select(-organ_code, -time_code) # Remove intermediate columns for cleanliness
# Confirm successful processing
cat("All files have been read and combined successfully.\n")
All files have been read and combined successfully.
Rename columns
# Clean up column names to remove unwanted suffixes and clarify naming
names(df) <- gsub("\\.\\.\\.[0-9]+$", "", names(df)) # Remove trailing numbers from column names
names(df)[c(3, 5, 7)] <- c("Expr_Log_Ratio_miRNA", "confidence", "Expr_Log_Ratio_mRNA") # Rename relevant columns
df$ID <- sub("^rno-", "", df$ID) # Clean up 'rno-' prefix from miRNA IDs
symbol-pathway mapping
# Check for consistency in Symbol-Pathway mapping
inconsistent <- df %>%
group_by(Symbol) %>%
summarize(unique_pathways = n_distinct(Pathway)) %>%
filter(unique_pathways > 1)
# If the resulting dataframe is empty, there are no inconsistencies
if (nrow(inconsistent) == 0) {
cat("All Symbols have consistent Pathway associations.\n")
} else {
cat("Some Symbols have inconsistent Pathway associations:\n")
print(inconsistent)
}
All Symbols have consistent Pathway associations.
Analysis
Descriptives
# Count and summarize frequency of miRNAs
miRNA_freq <- df %>%
group_by(ID) %>%
summarize(Count = n()) %>%
arrange(desc(Count))
# Number of pathways detected per organ
pathway_freq <- df %>%
group_by(sample) %>%
summarize(Count = n()) %>%
arrange(desc(Count))
# Frequency of each confidence level
confidence_freq <- df %>%
group_by(confidence) %>%
summarize(Count = n()) %>%
arrange(desc(Count))
# Frequency of mRNA symbols and their associated pathways
mRNA_freq <- df %>%
group_by(Symbol, Pathway) %>%
summarize(Count = n(), .groups = "drop") %>%
arrange(desc(Count))
# Count unique miRNA-mRNA pairs
miRNA_mRNA_pairs <- df %>%
group_by(Symbol, ID) %>%
summarize(Frequency = n(), .groups = "drop") %>%
arrange(desc(Frequency))
# Render summary statistics
cat("The number of unique miRNA reviewed is:", length(unique(df$ID)), "\n")
The number of unique miRNA reviewed is: 62
cat("The number of unique mRNA reviewed is:", length(unique(df$Symbol)), "\n")
The number of unique mRNA reviewed is: 318
cat("The number of unique miRNA-mRNA pairs is:", nrow(miRNA_mRNA_pairs), "\n")
The number of unique miRNA-mRNA pairs is: 397
# Render interactive tables for visual inspection
datatable(miRNA_freq, caption = "Frequency of Each miRNA", options = list(pageLength = 10))
datatable(pathway_freq, caption = "Number of Detected Pathways per Organ", options = list(pageLength = 10))
datatable(confidence_freq, caption = "Frequency of Each Confidence Level", options = list(pageLength = 10))
datatable(mRNA_freq, caption = "Frequency of Each mRNA and Associated Pathway", options = list(pageLength = 10))
datatable(miRNA_mRNA_pairs, caption = "Frequency of miRNA-mRNA Pairs", options = list(pageLength = 10))
Visuals
Define plot elements
theme2 <- theme(
axis.text.x = element_text(face = "bold", color = "royalblue4", size = 12, angle = 30, hjust = 1), # Adjust label angle
axis.text.y = element_text(face = "bold", color = "royalblue3", size = 12),
axis.title = element_text(size = 14, face = "bold"),
plot.title = element_text(size = 16, face = "bold.italic")
)
plot top 15 miRNA
# Plot the top 15 most frequent miRNA
miRNA_freq %>%
slice_max(Count, n = 15) %>%
ggplot(aes(x = reorder(ID, -Count), y = Count, fill = Count)) +
geom_col() +
geom_text(aes(label = Count), vjust = -0.3, color = "black", size = 4, fontface = "bold") +
scale_fill_gradient(low = "lightblue", high = "royalblue4") +
labs(
title = "Top 15 Most Frequent miRNA",
x = "miRNA",
y = "Frequency"
) +
theme2 +
scale_y_continuous(
breaks = seq(0, max(miRNA_freq$Count), by = 5),
expand = expansion(mult = c(0, 0.1)) # Add space above the tallest bar
) +
theme(legend.position = "none")

# Plot all mRNA (Symbol) with frequency above 3
mRNA_freq %>%
filter(Count > 3) %>%
ggplot(aes(x = reorder(Symbol, -Count), y = Count, fill = Count)) +
geom_col() +
geom_text(aes(label = Count), vjust = -0.3, color = "black", size = 4, fontface = "bold") +
scale_fill_gradient(low = "lightblue", high = "royalblue4") +
labs(
title = "mRNA with Frequency Above 3",
x = "mRNA",
y = "Frequency"
) +
theme2 +
scale_y_continuous(
breaks = seq(0, max(mRNA_freq$Count), by = 2),
expand = expansion(mult = c(0, 0.1))
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Experimental Network libraries
#subset df to only right hippocamous 24 hours
df24_rh <- df %>%
filter(sample == "Right_Hippocampus_24hrs")
igraph
# Create an edge list from the subset
edges <- data.frame(from = df24_rh$ID, to = df24_rh$Symbol)
# Create the graph object
network <- graph_from_data_frame(edges)
# visualization
plot(
network,
vertex.size = 10,
vertex.label.cex = 0.8,
vertex.label.color = "black",
vertex.frame.color = NA,
vertex.color = ifelse(V(network)$name %in% df24_rh$ID, "skyblue", "salmon"),
edge.arrow.size = 0.4,
edge.color = "gray",
edge.width = 0.8,
main = "miRNA-mRNA Network (Right Hippocampus, 24hrs)"
)

ggraph
# Create an edge list and graph object
edges <- data.frame(from = df24_rh$ID, to = df24_rh$Symbol)
network <- graph_from_data_frame(edges, directed = FALSE) # Convert to undirected graph
# Detect communities for clustering
communities <- cluster_louvain(network)
V(network)$community <- communities$membership # Assign community memberships to nodes
# Identify node types (miRNA or mRNA)
V(network)$type <- ifelse(V(network)$name %in% df24_rh$ID, "miRNA", "mRNA")
# Visualize the network
ggraph(network, layout = "fr") + # Force-directed layout
geom_edge_link(aes(edge_alpha = 0.5), edge_color = "gray") +
geom_node_point(aes(color = factor(community), shape = type), size = 6) +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
scale_color_brewer(palette = "Set3") +
scale_shape_manual(values = c("miRNA" = 21, "mRNA" = 19)) +
theme_void() +
labs(
title = "miRNA-mRNA Network (Right Hippocampus, 24hrs)",
color = "Community",
shape = "Node Type"
) +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
legend.position = "none"
)

Pathview
library(pathview)
##############################################################################
Pathview is an open source software package distributed under GNU General
Public License version 3 (GPLv3). Details of GPLv3 is available at
http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
formally cite the original Pathview paper (not just mention it) in publications
or products. For details, do citation("pathview") within R.
The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
license agreement (details at http://www.kegg.jp/kegg/legal.html).
##############################################################################
# Summarize mRNA frequencies
gene_data <- df24_rh %>%
group_by(Symbol) %>%
summarize(Frequency = n())
# Convert to a named vector
gene_data_vector <- setNames(gene_data$Frequency, gene_data$Symbol)
# Specify the KEGG pathway ID for Inflammatory Mediator Regulation of TRP Channels
pathway_id <- "hsa04750"
# Visualize the pathway
pathview(
gene.data = gene_data_vector,
pathway.id = pathway_id,
species = "hsa",
gene.idtype = "SYMBOL",
limit = list(gene = max(gene_data$Frequency)),
kegg.native = TRUE
)
'select()' returned 1:1 mapping between keys and columns
[1] "Note: 1 of 53 unique input IDs unmapped."
'select()' returned 1:1 mapping between keys and columns
Info: Working in directory /Users/alicewoolard/Library/Mobile Documents/com~apple~CloudDocs
Info: Writing image file hsa04750.pathview.png
---
title: "IPA miRNA-mRNA"
output: html_notebook
---

### Set Up

#### Load Libraries and Set Working Directory
```{r setup, warning=FALSE, message=FALSE, include=FALSE}
rm(list = ls())
library(tidyverse)
library(dplyr)
library(ggraph)
library(igraph)
library(DT)


library(here)
setwd(here())
```

#### Rename samples
```{r}
# Define mappings for renaming organs and time point samples
organ_mapping <- c(
  amyg = "Amygdala",
  drg = "DRG",
  heart = "Heart",
  hypo = "Hypothalamus",
  lhip = "Left_Hippocampus",
  musc = "Muscle",
  rhip = "Right_Hippocampus",
  sc = "Spinal_Cord"
)

time_mapping <- c(
  "2" = "2hrs",
  "24" = "24hrs",
  "72" = "72hrs"
)
```

#### Read + combine files
```{r, warning=FALSE, message=FALSE}
# Read all .txt files in the "miRNA results" folder and combine them into a single dataframe
df <- list.files("miRNA results", pattern = "\\.txt$", full.names = TRUE) %>%
  set_names(~ tools::file_path_sans_ext(basename(.))) %>%
  map_dfr(~ read_tsv(.x, skip = 2, show_col_types = FALSE), .id = "sample") %>%
  select(-c(3, 7, 11)) %>%
  mutate(
    # Extract organ and timepoint information using regex
    organ_code = str_extract(sample, "^[a-zA-Z]+"),
    time_code = str_extract(sample, "\\d+$"),
    # Map to full names
    sample = paste0(organ_mapping[organ_code], "_", time_mapping[time_code])
  ) %>%
  select(-organ_code, -time_code)  # Remove intermediate columns for cleanliness

# Confirm successful processing
cat("All files have been read and combined successfully.\n")
```

#### Rename columns
```{r, warning=FALSE, message=FALSE}
# Clean up column names to remove unwanted suffixes and clarify naming
names(df) <- gsub("\\.\\.\\.[0-9]+$", "", names(df))  # Remove trailing numbers from column names
names(df)[c(3, 5, 7)] <- c("Expr_Log_Ratio_miRNA", "confidence", "Expr_Log_Ratio_mRNA")  # Rename relevant columns
df$ID <- sub("^rno-", "", df$ID)  # Clean up 'rno-' prefix from miRNA IDs
```

#### symbol-pathway mapping
```{r}
# Check for consistency in Symbol-Pathway mapping
inconsistent <- df %>%
  group_by(Symbol) %>%
  summarize(unique_pathways = n_distinct(Pathway)) %>%
  filter(unique_pathways > 1)

# If the resulting dataframe is empty, there are no inconsistencies
if (nrow(inconsistent) == 0) {
  cat("All Symbols have consistent Pathway associations.\n")
} else {
  cat("Some Symbols have inconsistent Pathway associations:\n")
  print(inconsistent)
}
```

### Analysis

#### Descriptives
```{r}
# Count and summarize frequency of miRNAs
miRNA_freq <- df %>%
  group_by(ID) %>%
  summarize(Count = n()) %>%
  arrange(desc(Count))

# Number of pathways detected per organ
pathway_freq <- df %>%
  group_by(sample) %>%
  summarize(Count = n()) %>%
  arrange(desc(Count))

# Frequency of each confidence level
confidence_freq <- df %>%
  group_by(confidence) %>%
  summarize(Count = n()) %>%
  arrange(desc(Count))

# Frequency of mRNA symbols and their associated pathways
mRNA_freq <- df %>%
  group_by(Symbol, Pathway) %>%      
  summarize(Count = n(), .groups = "drop") %>% 
  arrange(desc(Count))

# Count unique miRNA-mRNA pairs
miRNA_mRNA_pairs <- df %>%
  group_by(Symbol, ID) %>%
  summarize(Frequency = n(), .groups = "drop") %>%
  arrange(desc(Frequency))

# Render summary statistics
cat("The number of unique miRNA reviewed is:", length(unique(df$ID)), "\n")
cat("The number of unique mRNA reviewed is:", length(unique(df$Symbol)), "\n")
cat("The number of unique miRNA-mRNA pairs is:", nrow(miRNA_mRNA_pairs), "\n")

# Render interactive tables for visual inspection
datatable(miRNA_freq, caption = "Frequency of Each miRNA", options = list(pageLength = 10))
datatable(pathway_freq, caption = "Number of Detected Pathways per Organ", options = list(pageLength = 10))
datatable(confidence_freq, caption = "Frequency of Each Confidence Level", options = list(pageLength = 10))
datatable(mRNA_freq, caption = "Frequency of Each mRNA and Associated Pathway", options = list(pageLength = 10))
datatable(miRNA_mRNA_pairs, caption = "Frequency of miRNA-mRNA Pairs", options = list(pageLength = 10))
```




#### Visuals

#### Define plot elements
```{r, warning=FALSE, message=FALSE}
theme2 <- theme(
  axis.text.x = element_text(face = "bold", color = "royalblue4", size = 12, angle = 30, hjust = 1),  # Adjust label angle
  axis.text.y = element_text(face = "bold", color = "royalblue3", size = 12),
  axis.title = element_text(size = 14, face = "bold"),
  plot.title = element_text(size = 16, face = "bold.italic")
)
```

#### plot top 15 miRNA
```{r}
# Plot the top 15 most frequent miRNA
miRNA_freq %>%
  slice_max(Count, n = 15) %>%
  ggplot(aes(x = reorder(ID, -Count), y = Count, fill = Count)) + 
  geom_col() +
  geom_text(aes(label = Count), vjust = -0.3, color = "black", size = 4, fontface = "bold") +  
  scale_fill_gradient(low = "lightblue", high = "royalblue4") + 
  labs(
    title = "Top 15 Most Frequent miRNA",
    x = "miRNA",
    y = "Frequency"
  ) +
  theme2 +
  scale_y_continuous(
    breaks = seq(0, max(miRNA_freq$Count), by = 5), 
    expand = expansion(mult = c(0, 0.1))            # Add space above the tallest bar
  ) +
  theme(legend.position = "none")

# Plot all mRNA (Symbol) with frequency above 3
mRNA_freq %>%
  filter(Count > 3) %>%
  ggplot(aes(x = reorder(Symbol, -Count), y = Count, fill = Count)) + 
  geom_col() +
  geom_text(aes(label = Count), vjust = -0.3, color = "black", size = 4, fontface = "bold") + 
  scale_fill_gradient(low = "lightblue", high = "royalblue4") +  
  labs(
    title = "mRNA with Frequency Above 3",
    x = "mRNA",
    y = "Frequency"
  ) +
  theme2 +
  scale_y_continuous(
    breaks = seq(0, max(mRNA_freq$Count), by = 2),
    expand = expansion(mult = c(0, 0.1))          
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

### Experimental Network libraries
```{r}
#subset df to only right hippocamous 24 hours
df24_rh <- df %>%
  filter(sample == "Right_Hippocampus_24hrs")
```

#### igraph
```{r}
# Create an edge list from the subset
edges <- data.frame(from = df24_rh$ID, to = df24_rh$Symbol)

# Create the graph object
network <- graph_from_data_frame(edges)

# visualization
plot(
  network,
  vertex.size = 10,                 
  vertex.label.cex = 0.8,          
  vertex.label.color = "black",    
  vertex.frame.color = NA,         
  vertex.color = ifelse(V(network)$name %in% df24_rh$ID, "skyblue", "salmon"),  
  edge.arrow.size = 0.4,            
  edge.color = "gray",          
  edge.width = 0.8,              
  main = "miRNA-mRNA Network (Right Hippocampus, 24hrs)"
)
```
#### ggraph
```{r}
# Create an edge list and graph object
edges <- data.frame(from = df24_rh$ID, to = df24_rh$Symbol)
network <- graph_from_data_frame(edges, directed = FALSE)  # Convert to undirected graph

# Detect communities for clustering
communities <- cluster_louvain(network)
V(network)$community <- communities$membership  # Assign community memberships to nodes

# Identify node types (miRNA or mRNA)
V(network)$type <- ifelse(V(network)$name %in% df24_rh$ID, "miRNA", "mRNA")

# Visualize the network
ggraph(network, layout = "fr") +  # Force-directed layout
  geom_edge_link(aes(edge_alpha = 0.5), edge_color = "gray") +  
  geom_node_point(aes(color = factor(community), shape = type), size = 6) +  
  geom_node_text(aes(label = name), repel = TRUE, size = 3) + 
  scale_color_brewer(palette = "Set3") +  
  scale_shape_manual(values = c("miRNA" = 21, "mRNA" = 19)) + 
  theme_void() +  
  labs(
    title = "miRNA-mRNA Network (Right Hippocampus, 24hrs)",
    color = "Community",
    shape = "Node Type"
  )  +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    legend.position = "none" 
  )
```

#### Pathview
```{r}
library(pathview)

# Summarize mRNA frequencies
gene_data <- df24_rh %>%
  group_by(Symbol) %>%
  summarize(Frequency = n())

# Convert to a named vector
gene_data_vector <- setNames(gene_data$Frequency, gene_data$Symbol)

# Specify the KEGG pathway ID for Inflammatory Mediator Regulation of TRP Channels
pathway_id <- "hsa04750"

# Visualize the pathway
pathview(
  gene.data = gene_data_vector,
  pathway.id = pathway_id,
  species = "hsa",         
  gene.idtype = "SYMBOL", 
  limit = list(gene = max(gene_data$Frequency)),
  kegg.native = TRUE    
)
```


