# version 10 of the app
# includes conversion of FASTA file and a new tab that can convert .stru files
# SNP matrix generation
# PCA computation and plotting 
# Download buttons for both PCA results and SNP matrix 
# SNP matrix preview in the output
# cancel option
# pre-upload check of valid fasta data 

# Increase upload size limit to 400 MB
options(shiny.maxRequestSize = 400 * 1024^2)

# Load required libraries
library(shiny)
library(Biostrings)
library(ggplot2)

# ---------- Helper Function for FASTA ----------
create_snp_matrix <- function(fasta, cancel_check = NULL) {
  seqs <- as.character(fasta)
  
  # Remove empty or NA sequences
  valid <- nzchar(seqs)
  if (!all(valid)) {
    warning("Some sequences are empty and were excluded.")
    seqs <- seqs[valid]
  }
  
  # Check for equal length
  seq_lengths <- nchar(seqs)
  if (length(unique(seq_lengths)) != 1) {
    stop("FASTA sequences are not all the same length. Ensure they are aligned.")
  }
  
  total <- length(seqs)
  seq_len <- seq_lengths[1]
  seq_matrix <- matrix(NA, nrow = total, ncol = seq_len)
  
  for (i in seq_along(seqs)) {
    if (!is.null(cancel_check) && cancel_check()) {
      message("Processing cancelled by user.")
      return(NULL)
    }
    split_seq <- strsplit(seqs[i], "")[[1]]
    if (length(split_seq) != seq_len) {
      stop("Inconsistent sequence length found at position ", i)
    }
    seq_matrix[i, ] <- split_seq
  }
  
  snp_cols <- apply(seq_matrix, 2, function(col) length(unique(col)) > 1)
  snp_matrix <- seq_matrix[, snp_cols, drop = FALSE]
  
  base_to_num <- function(base) match(base, c("A", "C", "G", "T", "N"))
  snp_numeric <- apply(snp_matrix, c(1, 2), base_to_num)
  snp_numeric[is.na(snp_numeric)] <- 0
  
  return(snp_numeric)
}

# ---------- UI ----------
ui <- fluidPage(
  titlePanel("SNP PCA Analyzer"),
  
  tabsetPanel(
    tabPanel("FASTA File",
             sidebarLayout(
               sidebarPanel(
                 fileInput("fasta_file", "Upload FASTA File", accept = c(".fasta", ".fa")),
                 helpText("Upload aligned DNA sequences in FASTA format."),
                 actionButton("cancel", "Cancel Processing", class = "btn-danger"),
                 downloadButton("download_pca", "Download PCA Results"),
                 downloadButton("download_snp", "Download SNP Matrix"),
                 textOutput("fasta_validation_status")
               ),
               mainPanel(
                 plotOutput("pca_plot"),
                 verbatimTextOutput("snp_summary")
               )
             )
    ),
    
    tabPanel("STRU File",
             sidebarLayout(
               sidebarPanel(
                 fileInput("stru_file", "Upload .stru File", accept = c(".stru", ".txt")),
                 helpText("Upload a STRUCTURE-format SNP file (numeric format)."),
                 actionButton("cancel_stru", "Cancel Processing", class = "btn-danger"),
                 downloadButton("download_pca_stru", "Download PCA Results"),
                 downloadButton("download_snp_stru", "Download SNP Matrix")
               ),
               mainPanel(
                 plotOutput("pca_plot_stru"),
                 verbatimTextOutput("snp_summary_stru")
               )
             )
    )
  )
)

# ---------- Server ----------
server <- function(input, output) {
  
  # Cancel flags
  cancel_flag <- reactiveVal(FALSE)
  cancel_flag_stru <- reactiveVal(FALSE)
  
  observeEvent(input$cancel, {
    cancel_flag(TRUE)
  })
  
  observeEvent(input$cancel_stru, {
    cancel_flag_stru(TRUE)
  })
  
  # FASTA Reactives
  fasta_data <- reactive({
    req(input$fasta_file)
    fasta <- readDNAStringSet(input$fasta_file$datapath)
    
    # Validate FASTA file before processing
    seqs <- as.character(fasta)
    seq_lengths <- nchar(seqs)
    
    # Check if all sequences are of the same length
    if (length(unique(seq_lengths)) != 1) {
      stop("FASTA sequences are not all the same length. Please ensure they are aligned.")
    }
    
    # Update validation status
    output$fasta_validation_status <- renderText({
      "FASTA file validated: All sequences are of equal length."
    })
    
    return(fasta)
  })
  
  snp_matrix <- reactive({
    fasta <- fasta_data()
    cancel_flag(FALSE)
    
    withProgress(message = "Processing FASTA file...", value = 0.5, {
      result <- create_snp_matrix(fasta, cancel_check = cancel_flag)
      if (is.null(result)) {
        showNotification("FASTA processing cancelled.", type = "warning")
      }
      return(result)
    })
  })
  
  output$snp_summary <- renderPrint({
    matrix <- snp_matrix()
    req(!is.null(matrix))
    cat("SNP matrix dimensions:\n")
    print(dim(matrix))
    cat("\nPreview (first 5 rows/columns):\n")
    print(matrix[1:min(5, nrow(matrix)), 1:min(5, ncol(matrix))])
  })
  
  output$pca_plot <- renderPlot({
    matrix <- snp_matrix()
    req(!is.null(matrix))
    validate(
      need(ncol(matrix) >= 2, "Too few SNP columns for PCA."),
      need(nrow(matrix) >= 2, "Too few sequences for PCA.")
    )
    pca <- prcomp(matrix, scale. = TRUE)
    
    df <- as.data.frame(pca$x[, 1:2])
    df$sample <- rownames(df)
    
    ggplot(df, aes(x = PC1, y = PC2)) +
      geom_point(color = "blue", size = 3) +
      labs(title = "PCA of SNP Matrix (FASTA)", x = "PC1", y = "PC2") +
      theme_minimal()
  })
  
  output$download_pca <- downloadHandler(
    filename = function() paste0("PCA_results_FASTA_", Sys.Date(), ".csv"),
    content = function(file) {
      matrix <- snp_matrix()
      req(!is.null(matrix))
      pca <- prcomp(matrix, scale. = TRUE)
      write.csv(pca$x, file)
    }
  )
  
  output$download_snp <- downloadHandler(
    filename = function() paste0("SNP_matrix_FASTA_", Sys.Date(), ".csv"),
    content = function(file) {
      matrix <- snp_matrix()
      req(!is.null(matrix))
      rownames(matrix) <- names(fasta_data())
      write.csv(matrix, file)
    }
  )
  
  # STRU Reactives
  stru_matrix <- reactive({
    req(input$stru_file)
    cancel_flag_stru(FALSE)
    
    withProgress(message = "Processing STRU file...", value = 0.5, {
      raw <- read.table(input$stru_file$datapath, header = FALSE, stringsAsFactors = FALSE)
      
      if (nrow(raw) %% 2 != 0) {
        stop("STRU file must have an even number of rows (2 per individual).")
      }
      
      meta_cols <- 2
      data_only <- raw[, -(1:meta_cols)]
      
      combined <- matrix(NA, nrow = nrow(data_only) / 2, ncol = ncol(data_only))
      for (i in seq(1, nrow(data_only), by = 2)) {
        if (cancel_flag_stru()) {
          showNotification("STRU processing cancelled.", type = "warning")
          return(NULL)
        }
        row1 <- data_only[i, ]
        row2 <- data_only[i + 1, ]
        combined[(i + 1) / 2, ] <- paste0(row1, row2)
      }
      
      snp_numeric <- apply(combined, 2, as.numeric)
      return(snp_numeric)
    })
  })
  
  output$snp_summary_stru <- renderPrint({
    matrix <- stru_matrix()
    req(!is.null(matrix))
    cat("STRU matrix dimensions:\n")
    print(dim(matrix))
    cat("\nPreview (first 5 rows/columns):\n")
    print(matrix[1:min(5, nrow(matrix)), 1:min(5, ncol(matrix))])
  })
  
  output$pca_plot_stru <- renderPlot({
    matrix <- stru_matrix()
    req(!is.null(matrix))
    validate(
      need(ncol(matrix) >= 2, "Too few SNP columns for PCA."),
      need(nrow(matrix) >= 2, "Too few individuals for PCA.")
    )
    pca <- prcomp(matrix, scale. = TRUE)
    
    df <- as.data.frame(pca$x[, 1:2])
    df$sample <- rownames(df)
    
    ggplot(df, aes(x = PC1, y = PC2)) +
      geom_point(color = "darkgreen", size = 3) +
      labs(title = "PCA of SNP Matrix (.stru)", x = "PC1", y = "PC2") +
      theme_minimal()
  })
  
  output$download_pca_stru <- downloadHandler(
    filename = function() paste0("PCA_results_STRU_", Sys.Date(), ".csv"),
    content = function(file) {
      matrix <- stru_matrix()
      req(!is.null(matrix))
      pca <- prcomp(matrix, scale. = TRUE)
      write.csv(pca$x, file)
    }
  )
  
  output$download_snp_stru <- downloadHandler(
    filename = function() paste0("SNP_matrix_STRU_", Sys.Date(), ".csv"),
    content = function(file) {
      matrix <- stru_matrix()
      req(!is.null(matrix))
      write.csv(matrix, file)
    }
  )
}

# ---------- Run App ----------
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents