Calcium Imaging Analysis Code


# Housekeeping: remove all data in the global environment, but leave all functions defined

rm(list = setdiff(ls(), lsf.str()))



# Set Working Directory/ Load All Packages/ Set Memory ####


# Defines the working directory as the folder in which all data files are located
# For this script to work, place the script file in the root folder and double click to open (DO NOT DRAG AND DROP INTO R STUDIO)
# For Recursive processing, the R.script must be placed in the root folder containing all other folders
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

library(rstudioapi)
library(xlsx)
library(xlsxjars)
library(rJava)
library(stringr)
library(gplots)
library(ggplot2)
library(plyr)
library(XLConnect)
library(XLConnectJars)
library(xtable)
library(tcltk)
library(tcltk2)
library(MESS)
library(gridExtra)
library(gridBase)
library(zoo)
library(grid)

# Change default Java memory to 4 Gigabytes 
options(java.parameters = "-Xmx4g")

##########################################################


# Set all functions ####

argmax <- function(x, y, w=w, ...) {
  require(zoo)
  n <- length(y)
  y.smooth <<- loess(y ~ x, ...)$fitted
  y.max <- rollapply(zoo(y.smooth), 2*w+1, max, align="center")
  delta <- y.max - y.smooth[-c(1:w, n+1-1:w)]
  i.max <- which(delta <= 0) + w
  list(x=x[i.max], i=i.max, y.hat=y.smooth)
}

argmin <- function(x, y, w=w, ...) {
  require(zoo)
  n <- length(y)
  y.smooth <- loess(y ~ x, ...)$fitted
  y.min <- rollapply(zoo(y.smooth), 2*w+1, min, align="center")
  delta <- y.min - y.smooth[-c(1:w, n+1-1:w)]
  i.min <- which(delta >= 0) + w
  list(x=x[i.min], i=i.min, y.hat=y.smooth)
}


smooth_Normalization <- function(x,y,w,span) {
  

  w=1
  span=.25
  y=y
  x=x
  
  peaks <- argmax(x, y, w=w, span=span )
  nadirs <- argmin(x, y, w=w, span=span)
  
  ## take an average of all minima points   
  baseline_smooth_curve<-c(y.smooth)
  
  
  # normalize current cell's y values by dividing by the corresponding near perfectly smooth fit value
  
  normalizing<<-data.frame(y,baseline_smooth_curve)
  normalizing$norm_y<-normalizing$y/normalizing$baseline_smooth_curve
  norm_y<<-normalizing$norm_y
  
}






test <- function(w, span, thres) {

## smooth the data
      
  peaks <- argmax(x, y, w=w, span=span )
  nadirs <- argmin(x, y, w=w, span=span)
  
  sizes = sapply(peaks$i, function(peak.i) {
    nadir.i = nadirs$i[max(which(nadirs$i < peak.i))] 
    size = peaks$y.hat[peak.i] - nadirs$y.hat[nadir.i]
    # nadirs.yhat<<-nadirs$y.hat
    
    # I Added this line as a "conditional argument for missing values": size is now always numeric to avoid problems when 'calling' "[sizes > thres]"
    if(is.na(size)){size = 0}
    return(size)
  })
  
  
  
  # This returns the first & last peak, which is important when calculating frequency
  # infinite (missing) values are converted to NA to assist in table cleanup
  
  first.peakx <- min(peaks$x[sizes > thres], na.rm=T)
  if(!is.infinite(first.peakx)){first.peakx <- first.peakx}else{first.peakx = NA}
  
  last.peakx <- max(peaks$x[sizes > thres], na.rm=T)
  if(!is.infinite(last.peakx)){last.peakx <- last.peakx}else{last.peakx = NA}
  
  first.pre.troughX<-x[nadirs$i][max(which(x[nadirs$i]<first.peakx))]
  if(!is.infinite(first.pre.troughX)){first.pre.troughX <- first.pre.troughX}else{first.pre.troughX = NA}  
  
  ######
  # Calculating Average Peak Durations & last.post.troughX: (i.e the x value of local minima following the final peak)
  
  peakdur <- 0
  
  preT.vector<-c()
  
  if(length(sizes[sizes>thres])>0){
    for(Q in 1:length(sizes[sizes>thres])){
      
      if(Q==1){
        preT <- x[nadirs$i][max(which(x[nadirs$i]<peaks$x[sizes > thres][Q]))]
        posT <- x[nadirs$i][min(which(x[nadirs$i]>peaks$x[sizes > thres][Q]))]
        
        preT.vector<-c(preT) 
        
        if(!is.na(preT) & !is.na(posT))
        {
          peakdur <- peakdur + (posT - preT)
        }
        
        
      }
      
      if(Q>1){
        preT<-x[nadirs$i][max(which(x[nadirs$i]<peaks$x[sizes > thres][Q]))]
        posT<-x[nadirs$i][min(which(x[nadirs$i]>peaks$x[sizes > thres][Q]))]
        
        
        if( preT < peaks$x[sizes > thres][Q-1]
            # preT==x[nadirs$i][max(which(x[nadirs$i]<peaks$x[sizes > thres][Q-1]))] & posT==x[nadirs$i][min(which(x[nadirs$i]>peaks$x[sizes > thres][Q-1]))]  
        )
          
          
        { 
          peakdur<-peakdur
          
          
          # For the below code, here is what we aim to do:
          # if there is a preT trough, and it defines peak#1, and there exists Peak#2 right next to it:
          # However there is no PreT trough between these peaks, and the only reason why Peak#2 is considered a peak, is
          # Due to the fact that the "max nadir (trough) < this peak's x location" IS THE SAME AS THE TROUGH USED TO DEFINE PEAK#1 (which would otherwise not happen if a trough existed between peaks 1 & 2)
          # Therefore, the if statement above " if( preT < peaks$x[sizes > thres][Q-1]) " states that if preT for the current peak (peaks$x[sizes>thres][Q]) is less than the X value OF THE PREVIOUS PEAK, Don't Change Peak Duration
          # However, in the rare cases where multiple side by side peaks are defined without a trough between them:
          # a logical extra line of code would be ideal for REMOVING the current peaks Y value (i.e. sizes[sizes>thres][Q]) FROM THE COLLECTION OF PEAKS (i.e. sizes[sizes>thres])  
          
          # sizes<-subset(sizes, select = -sizes[sizes>thres])  # <- this is needs edits
          
          
          
        }  else {
          preT=preT
          posT=posT
          
          if(!is.na(preT) & !is.na(posT))
          {
            peakdur <- peakdur + (posT - preT)
          }
          preT.vector<-c(preT.vector, preT)                
        }               
      }
    }
    
    
    
    last.post.troughX<-x[nadirs$i][min(which(x[nadirs$i]>peaks$x[sizes > thres][length(sizes[sizes>thres])]))]
    if(is.na(last.post.troughX)){
      
      last.post.troughX=x[length(x)]
      peakdur<-peakdur + (last.post.troughX - preT.vector[length(preT.vector)])
      
    } else {
      last.post.troughX<-last.post.troughX
      peakdur<-peakdur
    }
    
    avgpeakdur <- peakdur/length(sizes[sizes>thres])
    
    
  } else { 
    peakdur = NA
    avgpeakdur = NA 
    last.post.troughX = NA
  }
  
  
  
  # Calculating First and Maximum Amplitudes respectively 
  # Infinite (missing) values are converted to "NA" for subsequent table cleanup 
  
  first.amp <- sizes[sizes > thres][1]
  
  max.amp <- max(sizes[sizes > thres])      
  
  if(!is.infinite(max.amp)){max.amp = max.amp}else{max.amp = NA}
  
  # This helps when organizing monotonics, which have an amp but no freq;; cells with freq have both)
  
  # Setting up an easily accessible table to use when calculating AUC   
  # This helps when organizing monotonics, which have an amp but no freq;; cells with freq have both)
  
  x = as.numeric(sub("^X","",rownames(data)))
  
  
  # Setting up an easily accessible table to use when calculating AUC   
  
  xy.table<-data.frame(x,y)
  
  # Define the local minimum value x prior to first peak (per each cell; if non-existent = NA)  
  # this value stands for a specific time, and can also be used for raw data AUC calculation
  
  x_initial<-as.numeric(first.pre.troughX)
  
  # Define the y value on Normalized data which corresponds to this X(time)_initial
  
  y_initial<-xy.table$y[xy.table$x==first.pre.troughX]
  
  # Transform by subtraction: y = y-y_initial (i.e. initial y value becomes 0, subsequent values are y - y_initial)
  
  y_sub<-y-y_initial
  
  ## If curve becomes negative, change value to 0 
  # (disclude from AUC integration' as alternate solution to adding negatives or adding absolute value of negatives)
  
  y_sub_zero<-y_sub
  y_sub_zero[y_sub_zero<0]<-0
  
  # integrate newly formed y_sub vectors into xy.table      
  xy.table$y_sub<-y_sub
  xy.table$y_sub_zero<-y_sub_zero
  
  # make a base column of 0, helps with aesthetics when shading plots for AUC
  xy.table$Base0<-0
  
  
  # # Incorporate column for Raw data y values 
  # xy.table$Raw.y<-Raw.y
  # 
  # # Define the Y Value in Raw data which corresponds to this X_initial
  # Raw.y_initial<-xy.table$Raw.y[xy.table$x==x_initial]
  # 
  # # Transform Raw data (Raw.y - Raw.y_initial)  
  # Raw.y_sub<-Raw.y - Raw.y_initial
  # 
  # # Change negative transformed Raw Data values to 0 
  # Raw.y_sub_zero<-Raw.y_sub
  # Raw.y_sub_zero[Raw.y_sub_zero<0]<-0
  # 
  # # incorporate these columns in the xy.table 
  # xy.table$Raw.y_sub<-Raw.y_sub
  # xy.table$Raw.y_sub_zero<-Raw.y_sub_zero
  
  # subset responsive portion of the data for shading   
  # shade.table<-xy.table[xy.table$x>=x_initial,]
  # shade.table<-shade.table[shade.table$x<=last.post.troughX,]
  # 
  # Calculating number of peaks, frequency, and AUC   
  if(any(sizes>thres, na.rm = TRUE)) {
    numpeaks <- sum(sizes > thres, na.rm = TRUE)
    average.amp<-(sum(sizes[sizes>thres]))/(numpeaks)
    Duration<- last.post.troughX - first.pre.troughX
    Total_Image_Time<-x[length(x)]
    
    # AUC calculations   
    Norm.AUC<-auc(x,y_sub, from = x_initial, to = last.post.troughX)
    # Raw.AUC.with.negs<-auc(x,Raw.y_sub, from = x_initial, to = last.post.troughX)
    # Raw.AUC<-auc(x,Raw.y_sub_zero, from = x_initial, to = last.post.troughX)
    
    if(numpeaks>1) {
      freq <- (numpeaks/(Duration))*1000
      Duty.cycle <- avgpeakdur/Duration
    } else {
      freq = NA
      Duty.cycle = NA
    }
  } else {
    numpeaks = NA
    average.amp = NA
    Duration = NA
    freq = NA
    Duty.cycle = NA
    Total_Image_Time = NA
    Norm.AUC = NA
    # Raw.AUC.with.negs = NA
    # Raw.AUC = NA
  }
  
  #############################################
  # Line Plot of Raw and/or smoothed y values over time(x)
  
  # Resetting ymin and ymax to define plot window boundaries/scales for smoothed/normalized curve 
  ymin<-min(y)
  ymax <-max(y)
  
  
  # set up plot window bounds/scale
  plot(x,y, cex=0.15, col="Gray", xlab = "Time (Seconds)", ylab = "Fold change from baseline", main=paste("Smoothed Curve of Normalized Data-SEM Shaded" ,"\n", "w = ", w, ", span = ", span, ", thres = ", thres, sep=""),ylim = c(min(y)-.25, max(y)+.25))
  
 y.hat<<-y.smooth
  
  # plot line for smoothed data
  lines(x, peaks$y.hat,  lwd=.5,col="darkblue") 
  
  # Show x,y values for local maxima, minima, and maxima>thres on smoothed curve 
  points(x[peaks$i], peaks$y.hat[peaks$i], col="Red", pch=19, cex=.25)
  points(x[nadirs$i], nadirs$y.hat[nadirs$i], col="Green", pch=19, cex=.25)
  points(x[peaks$i[sizes > thres]], nadirs$y.hat[peaks$i[sizes > thres]], col="Gold2", pch=19, cex=1)
  
  # Show red line from time(x) of first peak(y) to time(x) of final peak(y) 
  
  lines(c(first.peakx, last.peakx), c(max(peaks$y.hat)+.1,max(peaks$y.hat)+.1), col="red")
  abline(v=c(stimTime_string), col="blue",lwd=c(Stim_line_lwd))
  # Show Duration line from first.pre.troughX to last post.troughX: 
  # (refers to minima before first (or only) peak, and final post-peak minima time(x values respecte)
  
  lines(c(first.pre.troughX, last.post.troughX ), c(min(peaks$y.hat)-.1,min(peaks$y.hat)-.1), col="Purple")
  
  # Set legend
  
  # legend("topright", legend = paste("Total peaks = ", numpeaks, "\n freq = ", round(freq,digits=1), "mHz"), cex = 0.5)  


    
  
  ########################
  # AUC plot: for Raw Data (WITH NEGATIVES)
  
  # setting ymin and ymax to define plot boundaries/ scale for the AUC plot window
  # ymin<-min(Raw.y_sub)
  # ymax <- max(Raw.y_sub)
  
  # Create Area under the Curve plot (with soon to be shaded region)
  # This will be the second plot: presented directly below the raw data plot called first in percellwrapper:
  # I.e the first plot was generated by calling percellwrapper, which then calls this (test) function) 
  
  # making an initial plot window with all desired parameters and text  
  # if(any(sizes>thres, na.rm = TRUE)){  
  #   plot(x, Raw.y_sub, 
  #        cex=0.25, 
  #        col="Gray",
  #        xlab = "Time (Seconds)",
  #        ylab = "Mean Grey Pixel Intensity (Arbitrary Units)", 
  #        ylim = c(ymin-1, ymax+1),
  #        main=paste("Area Under the Curve: RAW DATA (With Negatives)",
  #                   "\n", "Blue = start  ", "  Purple = End")
  #   )
  #   
  #   # this line curve represents raw data with values for y - y_initial,
  #   # This sets the initial baseline y=0 for t=0, and adjusts each subsequent y(t) accordingly 
  #   lines(x,Raw.y_sub, lwd =1)
  #   
  #   points(x_initial,0, col="Cyan", pch=19, cex=1)
  #   points(x[length(x)],Raw.y_sub[length(Raw.y_sub)], col = "Purple", pch=19, cex=1)
  #   
  #   ## Shading of the Area inclusive in AUC (per cell) 
  #   
  #   polygon(c(shade.table$x, rev(shade.table$x)), c(shade.table$Raw.y_sub, rev(shade.table$Base0)),
  #           col = "gray70", border = NA)
  #   
  # }
  # # If the cell is unresponsive (no peaks){Skip AUC plot}
  # else{plot.new()}
  
  
  ########################
  # AUC plot: for RAW Data (Negatives = 0)
  
  # # setting ymin and ymax to define plot boundaries/ scale for the AUC plot window
  # ymin<-min(Raw.y_sub)
  # ymax<-max(Raw.y_sub)
  # # 
  # # Create Area under the Curve plot (with soon to be shaded region)
  # # This will be the second plot: presented directly below the raw data plot called first in percellwrapper:
  # # I.e the first plot was generated by calling percellwrapper, which then calls this (test) function) 
  # 
  # # making an initial plot window with all desired parameters and text  
  # if(any(sizes>thres, na.rm = TRUE)){
  #   plot(x, Raw.y_sub,
  #        cex=0.25,
  #        col="Gray",
  #        xlab = "Time (Seconds)",
  #        ylab = "Mean Grey Pixel Intensity (Arbitrary Units)",
  #        ylim = c(ymin-1, ymax+1),
  #        main=paste("Area Under the Curve: RAW DATA (Negatives = 0)",
  #                   "\n", "Blue = start  ", "  Purple = End")
  #   )
  #   
  #   # this line curve represents raw data with values for y - y_initial,
  #   # This sets the initial baseline y=0 for t=0, and adjusts each subsequent y(t) accordingly
  #   lines(x,Raw.y_sub, lwd =1)
  #   
  #   points(x_initial,0, col="Cyan", pch=19, cex=1)
  #   points(x[length(x)],Raw.y_sub[length(Raw.y_sub)], col = "Purple", pch=19, cex=1)
  #   
  #   ## Shading of the Area inclusive in AUC (per cell)
  #   
  #   
  #                             # change to last.post.troughX
  #   polygon(c(shade.table$x, rev(shade.table$x)), c(shade.table$Raw.y_sub_zero, rev(shade.table$Base0)),
  #           col = "gray70", border = NA)
  # }
  # # If the cell is unresponsive (no peaks){Skip AUC plot}
  # else{plot.new()}
  
  
  # Return all relevant parameters to be compiled into a table (by row/ biological cell)
  return(list(
    first = c(first.peakx), 
    last = c(last.peakx), 
    first.pre.troughX = c(first.pre.troughX),
    last.post.troughX = c(last.post.troughX),
    Duration = c(Duration),
    freq = c(freq),
    Duty.cycle = c(Duty.cycle),
    avg.peak.dur = c(avgpeakdur),
    Total_Image_Time=c(Total_Image_Time),
    fA = c(first.amp), 
    mA = c(max.amp), 
    numpeaks = c(numpeaks), 
    Average_Amplitude= c(average.amp),
    Norm.AUC=c(Norm.AUC)
    # ,
    # Raw.AUC.with.negs=c(Raw.AUC.with.negs),
    # Raw.AUC=c(Raw.AUC)
  ))
}

                              # heere
percellwrapper = function(cell, w = 1, span = .01, thres = .145) {
  # Raw.y<<-as.numeric(Raw.data[,cell])
  y <<- as.numeric(data[,cell])
  x <<- as.numeric(sub("^X","",rownames(data)))
  par(mfrow=c(3,1))
  plot(x,y, type="l",lwd=.75, ylim = c(min(y)-.25, max(y)+.25), xlab = "Time (Seconds)", ylab = "Fold change from baseline", main=paste("Cell",Track_ID_List[cell],"\n","\n", paste0("Tracked ",label," Data Normalized to max value")))
  abline(v=c(stimTime_string), col="blue",lwd=c(Stim_line_lwd))
  results = test(w, span, thres)
  return(results)
}

# function for AOV standard error

## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)

summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}


# function: load summary data for each 
load_combine <- function(dose_specific_files, experiment_drug_dose,
                         sheet_names = c("Summary_data","Response_Distributions","Monotonic_Amplitudes")) {
  
  # One Final_summary excel WORKBOOK is generated for every position (field) for a given dose
  # Each Final_summary WORKBOOK contains three sheets of summary data  
  
  # Load all worksheets for a given dose:
  # If number of POSITION files for a given dose = 1:
  
  # OLD! 
  # if(length(dose_specific_files)==1) 
  # {  
  #   # Iterates from the first sheet to the final sheet
  #   # Loads the excel file and creates a data.frame object for the table within that sheet
  #   # The data.frame objects are defined in the global environment: object names are derived from file names
  #   
  #   for(S in 1:length(sheet_names))
  #   {
  #     # Placeholder which retrieves the current object's file name to be assigned during output
  #     # Format: experiment_drug_dose_SheetName 
  #     
  #     variableN <- paste(experiment_drug_dose, sheet_names[S],sep="_")  
  #     
  #     # Begin at sheet 1, and continue to the final sheet (final sheet = length(sheet_names))  
  #     # Import the "i"th sheet:
  #     temp_sheet <-read.xlsx(dose_specific_files, sheetIndex = S)
  #     
  #     # Assigns output as a variable with relative identification 
  #     assign(eval(substitute(variableN)), temp_sheet, envir=.GlobalEnv)                                    
  #   }
  #   rm(temp_sheet)
  #   rm(S)
  # }
  
  # # # # # # # # # # # 
  # NEW!
  
  if(length(dose_specific_files)==1) 
  {  
    # Iterates from the first sheet to the final sheet
    # Loads the excel file and creates a data.frame object for the table within that sheet
    # The data.frame objects are defined in the global environment: object names are derived from file names
    
    for(S in 1:length(sheet_names))
    {
      # Placeholder which retrieves the current object's file name to be assigned during output
      # Format: experiment_drug_dose_SheetName 
      
      # dose_specific_files<-"Final_Summary_of_CD140-DRC.Rep1_oxo_OXO;250~nM_p0.xlsx"
      
      variableN <- paste(experiment_drug_dose, sheet_names[S],sep="_")  
      
      
      sheet.table<- readWorksheetFromFile(dose_specific_files,sheet = S )
      
      pos.string.matched.to.file<-unlist(strsplit(list.files(pattern=dose_specific_files),split = "_"))[7]
      pos.string.matched.to.file<-as.numeric(gsub("p","",gsub(".xlsx","",pos.string.matched.to.file)))
      
      
      
      sheet.table.rows<-nrow(sheet.table)
      temp.data.frame<-data.frame(1:sheet.table.rows)
      names(temp.data.frame)[1]<-"Position"
      temp.data.frame$Position<-pos.string.matched.to.file
      pos.df.matched.for.sheet.table<-temp.data.frame
      
      rm(temp.data.frame)
      rm(pos.string.matched.to.file)
      
      sheet.table<-cbind(pos.df.matched.for.sheet.table,sheet.table)
      
      assign(eval(substitute(variableN)), sheet.table, envir=.GlobalEnv)  # assigns iterative result as new variable in global.env       
      
      rm(sheet.table)
      rm(pos.df.matched.for.sheet.table)
      
    }
    rm(S)
  }
  
  
  # # # # # # # # # # # 
  
  # Combination of summary data for a given dose, per position
  
  # if multiple positions (fields) were imaged in a well for a given dose, each will have its own excel summary workbook 
  # To Process cumulative data per each 
  
  if(length(dose_specific_files)>1){      
    
    # The code proceeds to combine the "i"th sheet from each position workbook corresponding to a given dose
    # and combine them into one table per dose, per experiment - defined as a global environment object, per sheet
    for(S in 1:length(sheet_names))
    {   
      variableN <- paste(experiment_drug_dose, sheet_names[S],sep = "_")
      
      # This retrieves sheet names/data for a given dose using a pattern which 
      data.list <- lapply(dose_specific_files, readWorksheetFromFile, sheet = S)
      
      # Retrieves column names from data.frames
      data.names <- lapply(data.list, names)
      
      # Creates a "results data.frame" i.e.= begin an output dataframe for combined p0 & P1 data; starts by first inputing 'position 0 data'
      result <- data.frame(Position = 0, data.list[[1]])
      
      # Creates a working variable for new column name: position designation: formats according to data in position 0 dataframe
      current.names <- c("Position", data.names[[1]])
      
      for(i in seq_along(data.list)[-1]){
        new.names <- setdiff(data.names[[i]], current.names)
        current.names <- union(current.names, new.names)
        for(nm in new.names)
          result[[ nm ]] <- NA    # Placeholder
        temp <- as.data.frame(matrix(nrow = nrow(data.list[[i]]), ncol = ncol(result)))
        names(temp) <- current.names
        
        # I added -1 here to make 'sequenced data point#2 = '1';
        # (1st & 2nd default #'s are usually '1' and '2' respectively, the line below ∆'s this, to dictate a sequence 0,1...n)
        temp[[ "Position" ]] <- i-1
        temp[ , data.names[[i]] ] <- data.list[[i]]
        result <- rbind(result, temp)                # results in an appended data.frame with position designations
        rm(temp)
        assign(eval(substitute(variableN)), result, envir=.GlobalEnv)  # assigns iterative result as new variable in global.env       
      }    
      rm(result)      #initial cleanup
    } # End Sheet Processing Loop (for "if multiple positions exist")  
  } # End loop for if multiple position files exist...
} # End Load Combine Function


#######################


   # Process all files with corresponding Timestring files (this allows only those files to be processed which have been completely analyzed in ImageJ and are prepared for processing here)


        A_Final_Response_Breakdown_Table<-data.frame()
        
        ALL.TIME.files<-list.files(pattern = "_TimeStrings")
        
        for(filez in 1:length(ALL.TIME.files)){

  
   # designate the appropriate condition, and sequential timelapse being process (mainly for output naming and cataloging purposes)
 
     file_tag <- c(unlist(strsplit(ALL.TIME.files[filez],split = "_")))
       file_tag<-paste(file_tag[1],file_tag[2],file_tag[3], sep ="_")                      
    

   # the code below chronicles (in the consol below) the name of the corresponding that is currently being processed - this is updated as the code is being run

         cat("Processing:", file_tag,"\n")

       
# Conversion of xls ####

# Optional code for conversion of any extraneous data containing 'xls' files which need to be converted to xlsx prior to analysis)  
  
all.files<-list.files(pattern = paste0("Spots in tracks statistics_",file_tag))

all.files.new<-grep("xlsx",all.files, invert = TRUE, value = TRUE)

if(length(all.files.new)>0){

  for(i in 1:length(all.files.new)){

  old.name<-all.files.new[i]
   new.name<-gsub(".txt",".xlsx",old.name)

  wb<-loadWorkbook(new.name, create=TRUE)
   createSheet(wb,name = "All Tracked Means")

  Q<-read.table(old.name, sep="\t", comment.char="",header = TRUE)
  
  writeWorksheet(wb, data=read.table(old.name, sep="\t", comment.char="",header = TRUE),sheet = "All Tracked Means")
  
  saveWorkbook(wb)  
  

  if(file.exists(old.name))
  {file.remove(old.name)}

  rm(Q)
  rm(i)
  rm(old.name)
  rm(new.name)

}
}
#######################



# Load and Organize Spot (ROI) Values ####

# Import data files and delete uneeeded columns

New_table<-readWorksheetFromFile(paste0("Spots in tracks statistics_",file_tag,".xlsx"), sheet=1,header = TRUE)
New_table<-subset(New_table, select = c("ID","TRACK_ID","MEAN_INTENSITY","MAX_INTENSITY","POSITION_X", "POSITION_Y","RADIUS"))


# Import Timestring file (post artifact removal) corresponding to the current tracked cell file being processed

TimeStrings <-read.table(paste0(file_tag,"_TimeStrings.txt"), sep="\t", comment.char="",header = TRUE)
Time_string<-TimeStrings$Formatted.Time..s.
stimTime_string<-TimeStrings$Stim_Times[!is.na(TimeStrings$Stim_Times)]

Track_ID_List<-unique(New_table$TRACK_ID)
Frame1_Spot_labels<-c()
Frame1_X<-c()
Frame1_Y<-c()
Frame1_Radius<-c()


Max_of_Mean_Intensities<-c()
Max_of_Max_Intensities<-c()

# separate two tables data derived upon import and cleanup: a table for either max values or mean values for each cell tracked across all time points
# NOTE: max value analysis is deprecated and was not used in further analyses. code pertaining to max value data can be disregarded 


Mean_table<-subset(New_table, select = -c(MAX_INTENSITY))
Max_table<-subset(New_table,select = -c(MEAN_INTENSITY))


# # # # # # #


# Organization of a table for aesthetic graphical representation of 488nm excitation timepoints as vertical lines during future processing

Stim_line_lwd<-c()

for(S in 1:length(stimTime_string)){
  
  Stim_line_lwd<-c(Stim_line_lwd,0.1)

}

rm(S)



# # # # # # # # # # #

#########################################



# Process all ROI per frame for ALL CELLS ####

# save an organized csv copy of original track spot data for all cells across all frames

Every_Cell_output<-paste0(file_tag,"_Every_Cells'_Complete_ROI_data.csv")


write.csv(New_table,Every_Cell_output,row.names = FALSE)




#############################################



# Process Tracked Mean Intensities & Frame 1 Spot Data ####


# organize information for each cell across all frames into an easily manipulatable table for furthe processing

for(i in 1:length(Track_ID_List)){
  if(i==1){
    
    first_and_last<-Mean_table[Mean_table$TRACK_ID==Track_ID_List[i],]
    
    Frame1_Spot_labels<-c(Frame1_Spot_labels,first_and_last$ID[1])
      Frame1_X<-c(Frame1_X,first_and_last$POSITION_X[1])
        Frame1_Y<-c(Frame1_Y,first_and_last$POSITION_Y[1])
          Frame1_Radius<-c(Frame1_Radius,first_and_last$RADIUS[1])
        
    cell_mean_column_label = paste0("Mean", Track_ID_List[i])
    
    first_and_last[cell_mean_column_label]<-first_and_last$MEAN_INTENSITY
    first_and_last<-subset(first_and_last,select = c(cell_mean_column_label))    
    
    Max_of_Mean_Intensities<-c(Max_of_Mean_Intensities, max(first_and_last[,1]))
    
    
    
  }
  
  if(i>1){
    
    next_iter<-Mean_table[Mean_table$TRACK_ID==Track_ID_List[i],]
    
    Frame1_Spot_labels<-c(Frame1_Spot_labels,next_iter$ID[1])
      Frame1_X<-c(Frame1_X,next_iter$POSITION_X[1])
         Frame1_Y<-c(Frame1_Y,next_iter$POSITION_Y[1])
            Frame1_Radius<-c(Frame1_Radius,next_iter$RADIUS[1])
         
    cell_mean_column_label = paste0("Mean",Track_ID_List[i])
    
    next_iter[cell_mean_column_label]<-next_iter$MEAN_INTENSITY
    next_iter<-subset(next_iter, select = c(cell_mean_column_label))
    
    first_and_last<-cbind(first_and_last,next_iter)
   
    
    
           Max_of_Mean_Intensities<-c(Max_of_Mean_Intensities, max(next_iter[,1]))
    
  }
  
}
rm(i)


# Tracked_Means_Table now contains data for all cells' mean intensity values across all time frames 
Tracked_Means_Table<-first_and_last
rm(first_and_last)
rm(Mean_table)


Frame1_Spot_labels<-as.data.frame(Frame1_Spot_labels)
  Frame1_Spot_labels$Corresp_TrackID<-Track_ID_List
    Frame1_Spot_labels$Frame1_X<-Frame1_X
      Frame1_Spot_labels$Frame1_Y<-Frame1_Y
        Frame1_Spot_labels$Radius<-Frame1_Radius

# Institute the correct time points (post artifact frame removal in ImageJ) as x values to appropriately correspond to each cells' mean intensity values
rownames(Tracked_Means_Table) <- Time_string

# Re-purpose the Tracked_Means_Table as the variable 'data' for consistency of processing within pre-defined functions
data<-Tracked_Means_Table

# Accomodation for agreeable and representative inputs to pre-existing functions 
x = as.numeric(rownames(data))


# For each cell (column) normalize raw mean grey intensity values by corresponding smooth fit value

for(u in 1:ncol(data)){
  
  y = c(data[,u])
  

  
  norm_y = smooth_Normalization(x,y,w,span)
 
  

   data[,u]<-norm_y
    


}


# Consistency for function inputs (if x has by chance been re-assigned inappropriately)              
 
x = as.numeric(rownames(data))


# For ease of "within function labeling"
label<-"Mean"


# Designated starting point for output of graphical results from soon-to-be-called functions using the name of the current file being processed 

pdf(paste0("plotted_results_of_Tracked_",label,"_",file_tag,".pdf"))


# First loess fitting of normalized mean intensity data for all cells 

r.mean = sapply((1:dim(data)[2]), function(cell) {
  
                
  percellwrapper(cell, w = 1, span = .01, thres = .145)
          
  
}, simplify="array")


dev.off()

# Organization of output data for import into ImageJ (to be used for aesthetic presentation and swift reference)

Frame1_spotInfo_name<-paste0(file_tag,"_Frame1_spotInfo.csv")
if(file.exists(Frame1_spotInfo_name))
{file.remove(Frame1_spotInfo_name)}


write.csv(Frame1_Spot_labels, Frame1_spotInfo_name)

r.mean = data.frame(t(as.matrix(r.mean)))
head(r.mean)
as.matrix(r.mean)

# This writes 'r' to a worksheet, then reads it to generate an Initial Column denoting cell number (to be edited later) 
write.csv(as.matrix(r.mean), "csv_raw_summary_of_.csv")
r.mean = read.csv("csv_raw_summary_of_.csv")
r.mean$X<-Track_ID_List

# Remove NA rows or incomplete columns (i.e. only responsive cells which have >0 peaks)
r.mean<- r.mean[complete.cases(r.mean[,(1:6)]),]

# Designate hard copy file name for data Table output:

file.name<-paste0("Final_Summary_of_",file_tag,"_Tracked_Cell_Data.xlsx")

if(file.exists(file.name))
{file.remove(file.name)}


# Response_Percent<-data.frame(nrow())


# Generate a final revised summary which only includes responsive cells)

wb<-loadWorkbook(file.name, create = TRUE) 

createSheet(wb,name = "Tracked_Means_Summary")



writeWorksheet(wb, data=r.mean,sheet="Tracked_Means_Summary", startRow = 1, startCol = 1, header = TRUE)

saveWorkbook(wb)  

rm(wb)


# Remove temporary placeholder file

if(file.exists("csv_raw_summary_of_.csv"))
{file.remove("csv_raw_summary_of_.csv")}



##########################################################



# Smooth/Normalize Averaged Responsive Cell Trace + Aesthetic Presentation ####

# IN THIS SECTION:

## Subset raw mean intensity values into a table inclusive of only those responsive cells with >3 peaks), 
  ## Calculate Average ± SEM of all responsive cells' normalized data: per timepoint
   ## Plot averaged normalized data (responsive cells) and overlay ± SEM as shaded polygon region


# placeholder variable equal to the table containing all normalized cell values over time 

clean_mean_table<-r.mean

# adding row names for ease in manipulation down the road
rownames(clean_mean_table)<-seq_along(1:nrow(clean_mean_table))

# new column representing cell number denoted by "'mean' X" 
clean_mean_table$X<-paste0("Mean",clean_mean_table$X)


# subset the table to include cells with >3 peaks, to exclude apparent non responsive cells from averaging the data in future analyses
  responsive_string<-c(clean_mean_table$X[clean_mean_table$numpeaks>3])


# Subset raw data using cell number, based on responsive cell criteria obtained and characterized above
responsive_meanNorm_Table<-subset(Tracked_Means_Table,select = c(responsive_string))
  

# re-purpose this collection of raw intensity values for all responsive cells over all timepoints as the variable 'data' for ease and consistency upon input into pre-existing function
data<-responsive_meanNorm_Table



# once again, take the table containing raw data for only those responsive cells and normalize these values to the smooth fitted curve
# the reason why this is being done again on raw data (albeit now it only includes responsive cells) is to collect the same variable outputs from base functions and to derive averaged traces with the subsetted dataset

for(u in 1:ncol(data)){
  
  y = c(data[,u])
  
  
  
  norm_y = smooth_Normalization(x,y,w,span)
  
  
  
  data[,u]<-norm_y
  
  
  
}


# re-organize data using a new variable in order to derive mean and SEM values per cell for each timepoint, then remove this placeholder values (Q)
# this is mainly instituted for graphical presentation down the line

Q<-t(data)

Mean_string<-c()
SEM_string<-c()

# Collect mean and SEM of all responsive cells AT EACH TIMEPOINT
for(z in 1:ncol(Q)){


  Mean_string<-c(Mean_string,mean(Q[,z]))
  SEM_string<-c(   SEM_string, ( (sd(Q[,z]))/(sqrt(nrow(Q))) )   )
  
}
 

# combine all mean and sem values for each cell across all timepoints into one table for easy reference

Averaged_table<-as.data.frame(Mean_string)
  names(Averaged_table)[1]<-"MeanAveragedTrace"


# incorporate appropriate dataset-specific time values into this table as well
  row.names(Averaged_table)<-Time_string


# create a table to be used in the future for shading of graphical outputs, such that values represent mean values ± SEM, and time is represented in the correct units (hours)
shade.data<-Averaged_table   
    
shade.data$X<-row.names(shade.data)  
shade.data$X<-as.numeric(shade.data$X)/3600
shade.data$SEM_below<-Averaged_table$MeanAveragedTrace - SEM_string
shade.data$SEM_above<-Averaged_table$MeanAveragedTrace + SEM_string




# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 
# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 


# a modified version of hte percellwrapper function, which will be used to smooth and find peaks within responsive cells table
# this function will also allow for the graphical outputs to be generated for averaged curves of all responsive cells in a given dataset 

modified_percellwrapper = function(cell, w = 1, span = .01, thres=.145){

  
  y <<- as.numeric(data[,cell])
  x <<- as.numeric(sub("^X","",rownames(data)))

  par(mfrow=c(3,1))  
 
  plot(x,y, type="l",lwd=.25, ylim = c(min(y)-.1, max(y)+.1), xlab = "Time (Seconds)", ylab = "Fold change from baseline", main=paste("Averaged trace of Responsive Cells","\n","\n", paste0("Tracked Means, Data Normalized to smoothed baseline")))
  # lines(x,y,lwd=.5)


  abline(v=c(stimTime_string), col="blue",lwd=c(Stim_line_lwd))
  
  results = test(w, span, thres)
    return(results)
}



# re-purpose the desired data table (in this case the average normalized response of all responive cells per each timepoint) as a function-compatible input ('data') 
data = Averaged_table


# begin PDF encoding the graphical outputs of averaged trace data

pdf(paste0(file_tag,"_Averaged_trace_with_SEM-Shading.pdf"))

# call the aforementioned function to process all responsive cells by smoothing and averaging 
modified_percellwrapper(1)

# add a column to the graphical representation based data table which will allow for shading SEM above and below the smoothed curve as well as the normalized (not yet smoothed) data
shade.data$y.smooth<-y.smooth

polygon(c(shade.data$X, rev(shade.data$X)), c(shade.data$SEM_below, rev(shade.data$y.smooth)),col = "lightskyblue",border = NA)
polygon(c(shade.data$X, rev(shade.data$X)), c(shade.data$SEM_above, rev(shade.data$y.smooth)),col = "lightskyblue",border = NA)
lines(x,y.hat,lwd=0.25,col="mediumblue")


# Calculate the percent of responsive cells in each dataset (for output purposes and reference, this will be written on each output plot for averaged traces)
percent_responders = round(100*length(responsive_string)/ncol(Tracked_Means_Table),digits = 0)


# Convert the 'x' time variable from seconds to hours (dimensional analysis: 60s/m x 60m/h = 3600s ;; xs = xh/3600s ;; x is in hours)

x<-as.numeric(x)
x<-x/3600


# create a uunique string to bypass default x axis values when creating graphs, only include whole hours

x_integers<-unique(as.integer(x))

# x-limit for x axis should be longer than final timepoint by one for ease of manipulation in adobe illustrator (easier to make figures, etc)
x_integers<-c(x_integers, x_integers[length(x_integers)]+1)

last_tick<-x_integers[length(x_integers)]


# convert the list of stimulation times at 488nm into hours to accomodate the new x-axis scale
stimTime_string_hours<- as.numeric(stimTime_string)/3600


# * Plot averaged trace of all responsive cells' mean pixel intensities normalized to smoothed fitted curve 

## for reference, So far we have accomplished the following:
    # 1) Calculated all responsive cells were identified (>3 peaks)
    # 2) Subset all tracked cells' raw data to include only these responsive cells
    # 3) Normalized all raw values per each responsive cell to their corresponding values derived from that cell's smooth fitted curve at each timepoint
    # 4) Averaged these normalized values for all responsive cells AT EACH SPECIFIC TIMEPOINT, and as such, defined the final 'mean average values' found within each 'averaged trace'
    # 5) Derived values for shading graphs with ±SEM from mean averaged values, which were derived upon calculating these 'mean average values' per each timepoint
    # 6) Drank way too much coffee 

plot(x,Averaged_table$MeanAveragedTrace, type="l",lwd=.25, 
                          ylim = c(min(y)-.1, max(y)+.1), 
                          xaxt = "n",
                          xlab = "Time (Hours)", 
                          ylab = "Fold change from baseline", 
                          main=paste("Normalized Data: Average of:", "[",length(responsive_string),"/",ncol(Tracked_Means_Table),"] = (",percent_responders,"%)", "Responsive Cells ± SEM Shading","\n","\n", paste0("Tracked Means, Data Normalized to Fitted Smooth Curve Baseline ")))

axis(1, at=0:last_tick, labels=x_integers)


# plot vertical line corresponding to stimulation (488nm excitation) timepoints

abline(v=c(stimTime_string_hours), col="blue",lwd=c(Stim_line_lwd))


# shade ± SEM from averaged smoothed normalized datapoints per each timepoint

  polygon(c(shade.data$X, rev(shade.data$X)), c(shade.data$SEM_below, rev(shade.data$MeanAveragedTrace)),col = "lightskyblue",border = NA)
  polygon(c(shade.data$X, rev(shade.data$X)), c(shade.data$SEM_above, rev(shade.data$MeanAveragedTrace)),col = "lightskyblue",border = NA)

  # make bold the averaged trace line 
      lines(x,Averaged_table$MeanAveragedTrace,lwd=0.25,col="mediumblue")

    
# finish encoding and write graphs to file as detailed about

dev.off()

# # # # # # # # 


# save an excel spreadsheet containing the values used to graphically represent averaged responsive trace

wb<-loadWorkbook(file.name, create = TRUE) 

createSheet(wb,name = "Averaged_Responsive")



writeWorksheet(wb, data=Averaged_table, sheet="Averaged_Responsive", startRow = 1, startCol = 1, header = TRUE)

saveWorkbook(wb)  


rm(wb)

# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 
# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 

# calculate response statistics and measures for the current dataset

total_n<-ncol(Tracked_Means_Table)
responsive_n<-length(responsive_string)
percent_responsive<-percent_responders
rm(percent_responders)
condition<-file_tag

# create a variable containing the data calculated above within 1 row of a placeholder table

temporary<-data.frame(condition, responsive_n, total_n,percent_responsive)


# Incorporate the present datasets' statistics and measures into a cumulative table by row concatenation with all subsequent datasets' information
# NOTE: * if the present dataset is the first dataset to be processed, this code allows concatenation of new table with a single row onto a previously defined empty table
#         * for all subsequent datasets' information, it is concatenated by row

A_Final_Response_Breakdown_Table<-rbind(A_Final_Response_Breakdown_Table,temporary)

##############################################################################

    
} # END OF ITERATION LOOP corresponding to "ALL.TIME.files", beginning with the lines directly below the end of the "set all functions" section

  
  
# SAVE A FINAL TABLE FOR RESPONSE DISTRIBUTIONS ####

# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 
# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 
  
  
# remove/overwrite any previous files written within the working directory with the same name  
    
  if(file.exists("__ALL_Response_Percents_LTTL8.xlsx"))
  {file.remove("__ALL_Response_Percents_LTTL8.xlsx")}
  

# save the table created above which contains information pertaining to the concatentation of all statistics and measures per dataset
    
wb<-loadWorkbook("__ALL_Response_Percents_LTTL8.xlsx",create = TRUE)
createSheet(wb,"Response Breakdown")
writeWorksheet(wb, data=A_Final_Response_Breakdown_Table,sheet="Response Breakdown", startRow = 1, startCol = 1, header = TRUE)

saveWorkbook(wb)

rm(wb)

# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 
# # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # # # # # # # # # ## # # # # # # # 

###################################################
Calcium Imaging Analysis Code

Rich Seidman

8/17/2017