Data read

library(readxl)
sweep25 <- read_excel("sweep25.xlsx", col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
print(head(sweep25))
y <- sweep25$...2

Case 1 - Spikes detection using z-score algorithm

Working principle

It is based on the principle of dispersion: if a new data-point is a given x number of standard deviations away from some moving mean, the algorithm signals (also called z-score). The algorithm is very robust because it constructs a separate moving mean and deviation, such that signals do not corrupt the threshold. Future signals are therefore identified with approximately the same accuracy, regardless of the number of previous signals. The algorithm takes 3 inputs: lag = the lag of the moving window, threshold = the z-score at which the algorithm signals and influence = the influence (between 0 and 1) of new signals on the mean and standard deviation. For example, a lag of 5 will use the last 5 observations to smooth the data. A threshold of 3.5 will signal if a data-point is 3.5 standard deviations away from the moving mean. And the influence of 0.5 gives signals half of the influence that normal data-points have. Likewise, an influence of 0 ignores signals completely for recalculating the new threshold. Influence of 0 is, therefore, the most robust option (but assumes stationary); putting the influence option at 1 is least robust. For non-stationary data, the influence option should, therefore, be put somewhere between 0 and 1.
- Let y be a vector of timeseries data of at least length lag+2 - Let mean() be a function that calculates the mean - Let std() be a function that calculates the standard deviaton - Let absolute() be the absolute value function

Settings (the ones below are examples: choose what is best for your data)

  • set lag to 6000; # lag 6000 for the smoothing functions
  • set threshold to 20; # 20 standard deviations for signal
  • set influence to 0; # between 0 and 1, here 1 is normal influence

Initialize variables

  • set signals to vector 0,…,0 of length of y; # Initialize signal results
  • set filteredY to y(1),…,y(lag) # Initialize filtered series
  • set avgFilter to null; # Initialize average filter
  • set stdFilter to null; # Initialize std. filter
  • set avgFilter(lag) to mean(y(1),…,y(lag)); # Initialize first value
  • set stdFilter(lag) to std(y(1),…,y(lag)); # Initialize first value

Pseudocode

for i=lag+1,...,t do
  if absolute(y(i) - avgFilter(i-1)) > threshold*stdFilter(i-1) then
    if y(i) > avgFilter(i-1) then
      set signals(i) to +1;                     # Positive signal
    else
      set signals(i) to -1;                     # Negative signal
    end
    # Reduce influence of signal
    set filteredY(i) to influence*y(i) + (1-influence)*filteredY(i-1);
  else
    set signals(i) to 0;                        # No signal
    set filteredY(i) to y(i);
  end
  # Adjust the filters
  set avgFilter(i) to mean(filteredY(i-lag),...,filteredY(i));
  set stdFilter(i) to std(filteredY(i-lag),...,filteredY(i));
end

Function to find spikes

ThresholdingAlgo <- function(y,lag,threshold,influence) {
  signals <- rep(0,length(y))
  filteredY <- y[0:lag]
  avgFilter <- NULL
  stdFilter <- NULL
  avgFilter[lag] <- mean(y[0:lag], na.rm=TRUE)
  stdFilter[lag] <- sd(y[0:lag], na.rm=TRUE)
  for (i in (lag+1):length(y)){
    if (abs(y[i]-avgFilter[i-1]) > threshold*stdFilter[i-1]) {
      if (y[i] > avgFilter[i-1]) {
        signals[i] <- 1;
      } else {
        signals[i] <- -1;
      }
      filteredY[i] <- influence*y[i]+(1-influence)*filteredY[i-1]
    } else {
      signals[i] <- 0
      filteredY[i] <- y[i]
    }
    avgFilter[i] <- mean(filteredY[(i-lag):i], na.rm=TRUE)
    stdFilter[i] <- sd(filteredY[(i-lag):i], na.rm=TRUE)
  }
  return(list("signals"=signals,"avgFilter"=avgFilter,"stdFilter"=stdFilter))
}

Run algo with lag = 6000, threshold = 15, influence = 1

lag       <- 6000
threshold <- 20
influence <- 1

result <- ThresholdingAlgo(y,lag,threshold,influence)

Plot result

plot(1:length(y)/1000,y,type="l",ylab="Values",xlab="Time(in seconds)", main = 'Sweep 25 Data Plot') 
lines(1:length(y)/1000,result$avgFilter,type="l",col="cyan")
lines(1:length(y)/1000,result$avgFilter+threshold*result$stdFilter,type="l",col="green")
lines(1:length(y)/1000,result$avgFilter-threshold*result$stdFilter,type="l",col="green")

Plot spikes and print the parts with spikes

plot(1:length(y)/1000,result$signals,type="S",col="red",ylab="",xlab="",ylim=c(-1.5,1.5), main = 'Sweep 25 Spike Plot')

spikes <- which(result$signals != 0)
spikes <- spikes/1000
print(spikes)
##  [1] 61.143 61.144 61.145 61.146 68.780 68.781 68.782 68.783 68.784 68.785
## [11] 68.786 68.787 68.788 77.995 77.996 77.997 77.998 77.999 78.000 78.001
## [21] 78.002 78.003 88.008 88.009 88.010 88.011 88.012 88.013 88.014 88.015
## [31] 88.016 88.017 88.018 88.019 98.014 98.015 98.016 98.017 98.018 98.019
## [41] 98.020 98.021

Find the interval between the spikes

This is done by initializing the fist element with 0 and than subtract each of the element of the spikes with it’s subsequent element.

interval <- 0
findInterval <- function(spikes) {
  interval[1] = 0
  for (i in 2:length(spikes)) {
    interval[i] <- spikes[i] - spikes[i-1]
  }
  return(interval)
}

interval <- findInterval(spikes)

Cleaning out the data where single spike is falsely detected multiple times

Giving a threshold of 10, if the interval is less than 10, the spike is termed and false and is dropped out, only the spikes with more than 10 threshold is taken.
This helps the multiple detection of the same spike to be dropped out and each spike is detected only once.

j <- 1
isi <- 0
for (i in 1:length(interval)) {
  if(interval[i] > 0.010) {
    isi[j] <- interval[i]
    j <- j+1
  }
}
print(isi)
## [1]  7.634  9.207 10.005  9.995

Average Inter Spike Interval

avgISI <- mean(isi)
print(avgISI)
## [1] 9.21025

Case 2 - Spike detection using Discrete Wavelet Trasnform(DWT)

Calculate DWT with filter=la8 and levels=15

result <- dwt(X, filter="la8", n.levels, boundary="periodic", fast=TRUE)

Plot result

plot(1:length(y)/1000,y,type="l",ylab="Values",xlab="Time(in seconds)", main = 'Sweep 25 Data Plot') 
lines(1:length(y)/1000,spikesDWT,type="l",col="cyan")
lines(1:length(y)/1000,thresholdDWT,type="l",col="green")
lines(1:length(y)/1000,intervalDWT,type="l",col="green")

Plot spikes and print the parts with spikes

plot(1:length(y)/1000,signalDWT,type="S",col="red",ylab="",xlab="",ylim=c(-1.5,1.5), main = 'Sweep 25 Spike Plot')
spikes <- which(signalDWT != 0)
spikes <- spikes/1000
print(spikes)

##  [1] 61.08767 61.08867 61.08967 61.09067 68.72467 68.72567 68.72667 68.72767
##  [9] 68.72867 68.72967 68.73067 68.73167 68.73267 77.93967 77.94067 77.94167
## [17] 77.94267 77.94367 77.94467 77.94567 77.94667 77.94767 87.95267 87.95367
## [25] 87.95467 87.95567 87.95667 87.95767 87.95867 87.95967 87.96067 87.96167
## [33] 87.96267 87.96367 97.95867 97.95967 97.96067 97.96167 97.96267 97.96367
## [41] 97.96467 97.96567

Find the interval between the spikes

This is done by initializing the fist element with 0 and than subtract each of the element of the spikes with it’s subsequent element.

interval <- 0
findInterval <- function(spikes) {
  interval[1] = 0
  for (i in 2:length(spikes)) {
    interval[i] <- spikes[i] - spikes[i-1]
  }
  return(interval)
}

interval <- findInterval(spikes)

Cleaning out the data where single spike is falsely detected multiple times

Giving a threshold of 10, if the interval is less than 10, the spike is termed and false and is dropped out, only the spikes with more than 10 threshold is taken.
This helps the multiple detection of the same spike to be dropped out and each spike is detected only once.

j <- 1
isiDWT <- 0
for (i in 1:length(interval)) {
  if(interval[i] > 0.010) {
    isiDWT[j] <- interval[i]
    j <- j+1
  }
}
print(isiDWT)
## [1]  7.633263  9.206263 10.004263  9.994263

Average Inter Spike Interval

avgISI_DWT <- mean(isiDWT)
print(avgISI_DWT)
## [1] 9.209513

Case 3 - Spike detection using S-Transform

Calculate S-Transform

result <- s-transform(X, filter="la8", n.levels, boundary="periodic", fast=TRUE)

Plot result

plot(1:length(y)/1000,y,type="l",ylab="Values",xlab="Time(in seconds)", main = 'Sweep 25 Data Plot') 
lines(1:length(y)/1000,spikesST,type="l",col="cyan")
lines(1:length(y)/1000,thresholdST,type="l",col="green")
lines(1:length(y)/1000,intervalST,type="l",col="green")

Plot spikes and print the parts with spikes

plot(1:length(y)/1000,signalST,type="S",col="red",ylab="",xlab="",ylim=c(-1.5,1.5), main = 'Sweep 25 Spike Plot')
spikes <- which(signalST != 0)
spikes <- spikes/1000
print(spikes)

##  [1] 61.08915 61.09015 61.09115 61.09215 68.72615 68.72715 68.72815 68.72915
##  [9] 68.73015 68.73115 68.73215 68.73315 68.73415 77.94115 77.94215 77.94315
## [17] 77.94415 77.94515 77.94615 77.94715 77.94815 77.94915 87.95415 87.95515
## [25] 87.95615 87.95715 87.95815 87.95915 87.96015 87.96115 87.96215 87.96315
## [33] 87.96415 87.96515 97.96015 97.96115 97.96215 97.96315 97.96415 97.96515
## [41] 97.96615 97.96715

Find the interval between the spikes

This is done by initializing the fist element with 0 and than subtract each of the element of the spikes with it’s subsequent element.

interval <- 0
findInterval <- function(spikes) {
  interval[1] = 0
  for (i in 2:length(spikes)) {
    interval[i] <- spikes[i] - spikes[i-1]
  }
  return(interval)
}

interval <- findInterval(spikes)

Cleaning out the data where single spike is falsely detected multiple times

Giving a threshold of 10, if the interval is less than 10, the spike is termed and false and is dropped out, only the spikes with more than 10 threshold is taken.
This helps the multiple detection of the same spike to be dropped out and each spike is detected only once.

j <- 1
isiST <- 0
for (i in 1:length(interval)) {
  if(interval[i] > 0.010) {
    isiST[j] <- interval[i]
    j <- j+1
  }
}
print(isiST)
## [1]  7.633414  9.206414 10.004414  9.994414

Average Inter Spike Interval

avgISI_ST <- mean(isiST)
print(avgISI_ST)
## [1] 9.209664

Result

Inter-spike Interval using different methods.

Inter-spike Interval using different methods.