Instructions:

  1. If needed, install all libraries required.
  2. Put the source file in the same file as the data file
  3. Source file should be CSV format. lines are intensities and columns for replicates.
  4. Cange the variable filename to the file name without the extension CSV.
  5. Adjust the percentage variable to whatever you want.
  6. RUN!

source code available to download here.

Table of contents

  1. Statistics
  2. Plots
  3. More statistics
library(readxl)
library(plotly)
library(tidyverse)
library(plyr)
library(dplyr)
library(cluster)
library(reshape2)
library(tibble)
library(formattable)
#  Mode function
getmode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}

file

filename = "data"
data = read.csv(paste0(filename,".csv"))
data = as.data.frame(t(data))
# `colnames<-`(data, data[1,]) # Use column names
data = data[2:min(dim(data)),] # Delete columns names row

cutoff

percentage = 20
all.cells = apply(data, 2, sum)
cum.cells = cumsum(all.cells)
threshold = min(which(cum.cells > sum(data) * percentage / 100 ))
#  Cut intensity below thresholds
trim.data = data[,threshold:length(data)]
max.threshold = min(which(apply(trim.data, 2, max) == 0))
double.trim = trim.data[,1:max.threshold]

Remove lines with peak cells in low intensity

peaks = apply(data, 1, max)
peak.intensity = numeric(length = length(data$V1))
for(row in seq(1:length(data$V1))) {
  peak.intensity[row] = which(data[row,] == peaks[row])
}
tripl.trim = double.trim[apply(double.trim, 1, max) >= peaks ,]
# plot(apply(double.trim, 2, mean))
# clustered = cbind(pam.cluster$clustering, double.trim[,1:3])
temp = rownames_to_column(as.data.frame(tripl.trim))
# temp$rowname = paste0(temp$rowname, "_", fan$clustering)
# colnames(melted)
melted = melt(temp)
Using rowname as id variables
colnames(melted) = c("replicate", "intensity", "frequency")
# melted = separate(melted, rowname, c("rowname", "cluster"), "_")
# gather(temp, id = rowname, convert = T)
# melted = melt(clustered, value.name = list(colnames(clustered)[2:length(colnames(clustered))]))
# plot_ly(melted, type = "scatter", mode = "markers", y = ~value, color = ~Var1)

Show summary statistics

tabl = as.data.frame(apply(tripl.trim, 1, summary))
tabl
write.csv(tabl, file = paste0("summary for table ", filename,".csv"))

Histogram

p = ggplot(melted, aes(x = intensity, y = frequency, color = replicate)) + 
    geom_point(alpha = 0.05, shape = 16) +
    theme_classic() 
    # labs(y = "Frequency", x = "Intensity", color = "Replicate")
p
ggsave(file = paste0("hist_", filename,".png"), plot =  p)
Saving 7.29 x 4.5 in image

Violin plot

p2 = ggplot(melted, aes(x = replicate, y = frequency, color = replicate)) + 
  geom_violin(aes(fill = replicate)) + 
  theme_classic()
p2
ggsave(file = paste0("violin_", filename,".png"), plot =  p2)
Saving 7.29 x 4.5 in image

Boxplot

# means <- aggregate(frequency ~  replicate, melted, mean)
p3 = ggplot(melted, aes(x = replicate, y = frequency, fill = replicate)) + 
  geom_boxplot() + 
  stat_summary(fun.y = mean, color = "black", geom = "point", show.legend = F) +
  # geom_text(data = means, aes(label = frequency, y = frequency + 0.08)) +
  theme_classic()
p3
ggsave(file = paste0("boxplot_", filename,".png"), plot =  p3)
Saving 7.29 x 4.5 in image

Density plot

# Adjust the alpha uf necessary
p4 = ggplot(melted, aes(x = replicate, y = frequency, color = replicate)) + 
  geom_jitter(height = 0, width = 0.45, alpha = 0.03) +
  theme_classic()
p4
ggsave(file = paste0("density_", filename,".png"), plot =  p4)
Saving 7.29 x 4.5 in image

T tests p values all pairs

# t.tripl = t(tripl.trim)
# combos <- combn(ncol(t.tripl),2)
# 
# adply(combos, 2, function(x) {
#   test <- t.test(t.tripl[, x[1]], t.tripl[, x[2]])
# 
#   out <- data.frame("var1" = colnames(t.tripl)[x[1]]
#                     , "var2" = colnames(t.tripl[x[2]])
#                     , "t.value" = sprintf("%.3f", test$statistic)
#                     ,  "df"= test$parameter
#                     ,  "p.value" = sprintf("%.3f", test$p.value)
#                     )
#   return(out)
# 
# })
# 
ttests = pairwise.t.test(melted$frequency, melted$replicate, p.adjust = "none")
ttests$method
[1] "t tests with pooled SD"
as.data.frame(ttests$p.value)
write.csv(ttests$p.value, file = paste0("ttests_", filename, ".csv"))
# tttable = as.data.frame(formatC(ttests$p.value, format = "e", digits = 2))
tttable = as.data.frame(ttests$p.value)
formattable(tttable, align = c("l",rep("r", NCOL(tttable) - 1)), 
            list(`Indicator Name` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
                 area(col = 1:9) ~ function(x) percent(x / 100, digits = 0),
                 area(col = 1:9) ~ color_tile("green", "red")))
---
title: "FACS"
author: "Rotem Hadar"
date: "7th Kislev 5779"
output: html_notebook
---

## Instructions:
0. If needed, install all libraries required.
1. Put the source file in the same file as the data file
2. Source file should be `CSV` format. lines are intensities and columns for replicates.
3. Cange the variable [`filename`](#file) to the file name without the extension `CSV`.
4. Adjust the [`percentage`](#cutoff) variable to whatever you want.
5. RUN!

source code available to download [here](https://bitbucket.org/benzzz/facs/src/master/facs.Rmd).

### Table of contents
1. [Statistics](#statistics)
2. [Plots](#plot)
3. [More statistics](#more)


```{r message=FALSE}
library(readxl)
library(plotly)
library(tidyverse)
library(plyr)
library(dplyr)
library(cluster)
library(reshape2)
library(tibble)
library(formattable)
```


```{r getMode}
#  Mode function
getmode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}
```

#### file
```{r file}
filename = "data"
data = read.csv(paste0(filename,".csv"))
data = as.data.frame(t(data))
# `colnames<-`(data, data[1,]) # Use column names
data = data[2:min(dim(data)),] # Delete columns names row
```
---
Not important, just cool
```{r Sword}
plot(diff(apply(data,2, mean))) # Sword graph
```

---
#### cutoff
```{r cutoff}
percentage = 20
all.cells = apply(data, 2, sum)
cum.cells = cumsum(all.cells)
threshold = min(which(cum.cells > sum(data) * percentage / 100 ))

#  Cut intensity below thresholds
trim.data = data[,threshold:length(data)]
max.threshold = min(which(apply(trim.data, 2, max) == 0))
double.trim = trim.data[,1:max.threshold]
```

Remove lines with peak cells in low intensity
```{r}
peaks = apply(data, 1, max)
peak.intensity = numeric(length = length(data$V1))
for(row in seq(1:length(data$V1))) {
  peak.intensity[row] = which(data[row,] == peaks[row])
}

tripl.trim = double.trim[apply(double.trim, 1, max) >= peaks ,]
# plot(apply(double.trim, 2, mean))
```

```{r eval=FALSE, include=FALSE}

clusth = hclust(dist(trim.data))
km = kmeans(double.trim, centers = 8)
fan = cluster::fanny(double.trim, 7)
fan$clustering
dais = cluster::daisy(double.trim)
 clusth$height

sil_width <- c(NA)
for(i in 2:15){
  pam_fit <- pam(  double.trim
                 , diss = TRUE
                 , k = i)
  
  sil_width[i] <- pam_fit$silinfo$avg.width
  
}

# Plot sihouette width (higher is better)

plot(1:15, sil_width,
     xlab = "Number of clusters",
     ylab = "Silhouette Width")
lines(1:15, sil_width)

K = which(sil_width == max(sil_width, na.rm = T))
pam.cluster = pam(double.trim, diss = T, k = K)
pam.cluster$clustering
```

```{r}
# clustered = cbind(pam.cluster$clustering, double.trim[,1:3])
temp = rownames_to_column(as.data.frame(tripl.trim))
# temp$rowname = paste0(temp$rowname, "_", fan$clustering)
# colnames(melted)

melted = melt(temp)
colnames(melted) = c("replicate", "intensity", "frequency")
# melted = separate(melted, rowname, c("rowname", "cluster"), "_")

# gather(temp, id = rowname, convert = T)
# melted = melt(clustered, value.name = list(colnames(clustered)[2:length(colnames(clustered))]))

# plot_ly(melted, type = "scatter", mode = "markers", y = ~value, color = ~Var1)
```

---
### Show summary statistics {#statistics}
```{r statistics}
tabl = as.data.frame(apply(tripl.trim, 1, summary))
tabl
write.csv(tabl, file = paste0("summary for table ", filename,".csv"))
```

### Histogram {#plot}
```{r plot}
p = ggplot(melted, aes(x = intensity, y = frequency, color = replicate)) + 
    geom_point(alpha = 0.05, shape = 16) +
    theme_classic() 
    # labs(y = "Frequency", x = "Intensity", color = "Replicate")
p
ggsave(file = paste0("hist_", filename,".png"), plot =  p)
```

### Violin plot
```{r violin}
p2 = ggplot(melted, aes(x = replicate, y = frequency, color = replicate)) + 
  geom_violin(aes(fill = replicate)) + 
  theme_classic()
p2
ggsave(file = paste0("violin_", filename,".png"), plot =  p2)
```

### Boxplot
```{r}
# means <- aggregate(frequency ~  replicate, melted, mean)
p3 = ggplot(melted, aes(x = replicate, y = frequency, fill = replicate)) + 
  geom_boxplot() + 
  stat_summary(fun.y = mean, color = "black", geom = "point", show.legend = F) +
  # geom_text(data = means, aes(label = frequency, y = frequency + 0.08)) +
  theme_classic()
p3
ggsave(file = paste0("boxplot_", filename,".png"), plot =  p3)
```

### Density plot
```{r}
# Adjust the alpha uf necessary
p4 = ggplot(melted, aes(x = replicate, y = frequency, color = replicate)) + 
  geom_jitter(height = 0, width = 0.45, alpha = 0.03) +
  theme_classic()
p4
ggsave(file = paste0("density_", filename,".png"), plot =  p4)
```

### T tests p values all pairs {#more}
```{r more}
# t.tripl = t(tripl.trim)
# combos <- combn(ncol(t.tripl),2)
# 
# adply(combos, 2, function(x) {
#   test <- t.test(t.tripl[, x[1]], t.tripl[, x[2]])
# 
#   out <- data.frame("var1" = colnames(t.tripl)[x[1]]
#                     , "var2" = colnames(t.tripl[x[2]])
#                     , "t.value" = sprintf("%.3f", test$statistic)
#                     ,  "df"= test$parameter
#                     ,  "p.value" = sprintf("%.3f", test$p.value)
#                     )
#   return(out)
# 
# })
# 

ttests = pairwise.t.test(melted$frequency, melted$replicate, p.adjust = "none")
ttests$method
as.data.frame(ttests$p.value)

write.csv(ttests$p.value, file = paste0("ttests_", filename, ".csv"))

# tttable = as.data.frame(formatC(ttests$p.value, format = "e", digits = 2))
tttable = as.data.frame(ttests$p.value)

formattable(tttable, align = c("l",rep("r", NCOL(tttable) - 1)), 
            list(`Indicator Name` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")), 
                 area(col = 1:9) ~ function(x) percent(x / 100, digits = 0),
                 area(col = 1:9) ~ color_tile("green", "red")))



```

