# version 10 of the app
# includes conversion of FASTA file and a new tab that can convert .stru files
# SNP matrix generation
# PCA computation and plotting
# Download buttons for both PCA results and SNP matrix
# SNP matrix preview in the output
# cancel option
# pre-upload check of valid fasta data
# Increase upload size limit to 400 MB
options(shiny.maxRequestSize = 400 * 1024^2)
# Load required libraries
library(shiny)
library(Biostrings)
library(ggplot2)
# ---------- Helper Function for FASTA ----------
create_snp_matrix <- function(fasta, cancel_check = NULL) {
seqs <- as.character(fasta)
# Remove empty or NA sequences
valid <- nzchar(seqs)
if (!all(valid)) {
warning("Some sequences are empty and were excluded.")
seqs <- seqs[valid]
}
# Check for equal length
seq_lengths <- nchar(seqs)
if (length(unique(seq_lengths)) != 1) {
stop("FASTA sequences are not all the same length. Ensure they are aligned.")
}
total <- length(seqs)
seq_len <- seq_lengths[1]
seq_matrix <- matrix(NA, nrow = total, ncol = seq_len)
for (i in seq_along(seqs)) {
if (!is.null(cancel_check) && cancel_check()) {
message("Processing cancelled by user.")
return(NULL)
}
split_seq <- strsplit(seqs[i], "")[[1]]
if (length(split_seq) != seq_len) {
stop("Inconsistent sequence length found at position ", i)
}
seq_matrix[i, ] <- split_seq
}
snp_cols <- apply(seq_matrix, 2, function(col) length(unique(col)) > 1)
snp_matrix <- seq_matrix[, snp_cols, drop = FALSE]
base_to_num <- function(base) match(base, c("A", "C", "G", "T", "N"))
snp_numeric <- apply(snp_matrix, c(1, 2), base_to_num)
snp_numeric[is.na(snp_numeric)] <- 0
return(snp_numeric)
}
# ---------- UI ----------
ui <- fluidPage(
titlePanel("SNP PCA Analyzer"),
tabsetPanel(
tabPanel("FASTA File",
sidebarLayout(
sidebarPanel(
fileInput("fasta_file", "Upload FASTA File", accept = c(".fasta", ".fa")),
helpText("Upload aligned DNA sequences in FASTA format."),
actionButton("cancel", "Cancel Processing", class = "btn-danger"),
downloadButton("download_pca", "Download PCA Results"),
downloadButton("download_snp", "Download SNP Matrix"),
textOutput("fasta_validation_status")
),
mainPanel(
plotOutput("pca_plot"),
verbatimTextOutput("snp_summary")
)
)
),
tabPanel("STRU File",
sidebarLayout(
sidebarPanel(
fileInput("stru_file", "Upload .stru File", accept = c(".stru", ".txt")),
helpText("Upload a STRUCTURE-format SNP file (numeric format)."),
actionButton("cancel_stru", "Cancel Processing", class = "btn-danger"),
downloadButton("download_pca_stru", "Download PCA Results"),
downloadButton("download_snp_stru", "Download SNP Matrix")
),
mainPanel(
plotOutput("pca_plot_stru"),
verbatimTextOutput("snp_summary_stru")
)
)
)
)
)
# ---------- Server ----------
server <- function(input, output) {
# Cancel flags
cancel_flag <- reactiveVal(FALSE)
cancel_flag_stru <- reactiveVal(FALSE)
observeEvent(input$cancel, {
cancel_flag(TRUE)
})
observeEvent(input$cancel_stru, {
cancel_flag_stru(TRUE)
})
# FASTA Reactives
fasta_data <- reactive({
req(input$fasta_file)
fasta <- readDNAStringSet(input$fasta_file$datapath)
# Validate FASTA file before processing
seqs <- as.character(fasta)
seq_lengths <- nchar(seqs)
# Check if all sequences are of the same length
if (length(unique(seq_lengths)) != 1) {
stop("FASTA sequences are not all the same length. Please ensure they are aligned.")
}
# Update validation status
output$fasta_validation_status <- renderText({
"FASTA file validated: All sequences are of equal length."
})
return(fasta)
})
snp_matrix <- reactive({
fasta <- fasta_data()
cancel_flag(FALSE)
withProgress(message = "Processing FASTA file...", value = 0.5, {
result <- create_snp_matrix(fasta, cancel_check = cancel_flag)
if (is.null(result)) {
showNotification("FASTA processing cancelled.", type = "warning")
}
return(result)
})
})
output$snp_summary <- renderPrint({
matrix <- snp_matrix()
req(!is.null(matrix))
cat("SNP matrix dimensions:\n")
print(dim(matrix))
cat("\nPreview (first 5 rows/columns):\n")
print(matrix[1:min(5, nrow(matrix)), 1:min(5, ncol(matrix))])
})
output$pca_plot <- renderPlot({
matrix <- snp_matrix()
req(!is.null(matrix))
validate(
need(ncol(matrix) >= 2, "Too few SNP columns for PCA."),
need(nrow(matrix) >= 2, "Too few sequences for PCA.")
)
pca <- prcomp(matrix, scale. = TRUE)
df <- as.data.frame(pca$x[, 1:2])
df$sample <- rownames(df)
ggplot(df, aes(x = PC1, y = PC2)) +
geom_point(color = "blue", size = 3) +
labs(title = "PCA of SNP Matrix (FASTA)", x = "PC1", y = "PC2") +
theme_minimal()
})
output$download_pca <- downloadHandler(
filename = function() paste0("PCA_results_FASTA_", Sys.Date(), ".csv"),
content = function(file) {
matrix <- snp_matrix()
req(!is.null(matrix))
pca <- prcomp(matrix, scale. = TRUE)
write.csv(pca$x, file)
}
)
output$download_snp <- downloadHandler(
filename = function() paste0("SNP_matrix_FASTA_", Sys.Date(), ".csv"),
content = function(file) {
matrix <- snp_matrix()
req(!is.null(matrix))
rownames(matrix) <- names(fasta_data())
write.csv(matrix, file)
}
)
# STRU Reactives
stru_matrix <- reactive({
req(input$stru_file)
cancel_flag_stru(FALSE)
withProgress(message = "Processing STRU file...", value = 0.5, {
raw <- read.table(input$stru_file$datapath, header = FALSE, stringsAsFactors = FALSE)
if (nrow(raw) %% 2 != 0) {
stop("STRU file must have an even number of rows (2 per individual).")
}
meta_cols <- 2
data_only <- raw[, -(1:meta_cols)]
combined <- matrix(NA, nrow = nrow(data_only) / 2, ncol = ncol(data_only))
for (i in seq(1, nrow(data_only), by = 2)) {
if (cancel_flag_stru()) {
showNotification("STRU processing cancelled.", type = "warning")
return(NULL)
}
row1 <- data_only[i, ]
row2 <- data_only[i + 1, ]
combined[(i + 1) / 2, ] <- paste0(row1, row2)
}
snp_numeric <- apply(combined, 2, as.numeric)
return(snp_numeric)
})
})
output$snp_summary_stru <- renderPrint({
matrix <- stru_matrix()
req(!is.null(matrix))
cat("STRU matrix dimensions:\n")
print(dim(matrix))
cat("\nPreview (first 5 rows/columns):\n")
print(matrix[1:min(5, nrow(matrix)), 1:min(5, ncol(matrix))])
})
output$pca_plot_stru <- renderPlot({
matrix <- stru_matrix()
req(!is.null(matrix))
validate(
need(ncol(matrix) >= 2, "Too few SNP columns for PCA."),
need(nrow(matrix) >= 2, "Too few individuals for PCA.")
)
pca <- prcomp(matrix, scale. = TRUE)
df <- as.data.frame(pca$x[, 1:2])
df$sample <- rownames(df)
ggplot(df, aes(x = PC1, y = PC2)) +
geom_point(color = "darkgreen", size = 3) +
labs(title = "PCA of SNP Matrix (.stru)", x = "PC1", y = "PC2") +
theme_minimal()
})
output$download_pca_stru <- downloadHandler(
filename = function() paste0("PCA_results_STRU_", Sys.Date(), ".csv"),
content = function(file) {
matrix <- stru_matrix()
req(!is.null(matrix))
pca <- prcomp(matrix, scale. = TRUE)
write.csv(pca$x, file)
}
)
output$download_snp_stru <- downloadHandler(
filename = function() paste0("SNP_matrix_STRU_", Sys.Date(), ".csv"),
content = function(file) {
matrix <- stru_matrix()
req(!is.null(matrix))
write.csv(matrix, file)
}
)
}
# ---------- Run App ----------
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents