Relationship between types of water and types of surfactants

This report was prepared with the objective of showing the development of the roadmap to evaluate the frequency of scientific studies that combined each type of water with a specific surfactant.

Packages

# Load required packages
library(ComplexHeatmap)
library(dendextend)
library(circlize)
library(RColorBrewer)
library(grid)
library(openxlsx)
library(kableExtra)
library(dplyr)
library(rmarkdown)
library(DT)

Data collection

The qualitative data observed are included in supplementary file 6, more specifically in the columns “Type of liquid” and “Type of compound with surfactant properties”.

# Watched data
data <- data.frame(
  SDS = c(3, 4, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
  SLS = c(1, 3, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1),
  Tween_80 = c(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  Triton_X100 = c(0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0),
  Tween_20 = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  Oleyl_Alcohol = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  LPS = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  Amine = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  Ester = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  HTAC = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  NP_10 = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  MAPK = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  HTABr = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
  OTABr = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
  TDTABr = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
  Hitenol_BC = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  LDBAB = c(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  EO_10 = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  PF68 = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  PMA_Na = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  DOSS = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  FAPEA = c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  STS = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),
  CTAB = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),
  CTAC = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),
  PAIC_Citron = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
  GMC = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
  SDBS = c(0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  RHA = c(0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  SAP = c(0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  row.names = c("Water", "Tap water", "Filtered tap water", "Sea water", "Unpolluted water (comparator)", "Purified water", "Deionized water", "Distilled water", "Clean water", "Clean salt water", "Filtered/sterilized groundwater", "Pure water", "Ultrapure water", "Double distilled and deionized water", "Degassed water", "Demineralized water", "Double distilled water", "NaCl aqueous solution")
)

Data table

# Display formatted table with fixed first column
DT::datatable(
  data,
  rownames = TRUE,  
  extensions = 'FixedColumns',  
  options = list(
    pageLength = 10,  
    scrollX = TRUE,   
    fixedColumns = list(leftColumns = 1)  
  )
) %>%
  formatStyle(
    columns = 0,  
    fontWeight = "bold"  
  ) %>%
  formatStyle(
    columns = colnames(data),  
    fontWeight = "normal",  
    textAlign = "center"  
  )

LEGEND

SDS: Sodium Dodecyl Sulfate
SLS: Sodium Lauryl Sulfate
OA:Oleyl Alcohol
LPS:Biosurfactant Lipopolysaccharide
HTAC: Hexadecyltrimethylammonium Chloride
NP 10: Nonylphenol Ethoxylate
MAPK: Potassium Monolauryl Phosphate
HTABr: Hexadecyltrimethylammonium Bromide
OTABr: Octadecyltrimethylammonium Bromide
TDTAB: Tetradecyltrimethylammonium Bromide
BC: Hiteno
LDBAB: Lauryl Dimethyl Benzyl Ammonium Bromide
10 EO: Ethylene Oxide
PF68: Pluronic F68
PMA-Na: Sodium Polymethacrylate
DOSS: Diisooctyl Sulfosuccinate
FAPEA: Fatty Acid Polyethanolamide
STS: Sodium Tetradecyl Sulfate
CTAB: Cetrimonium Bromide
CTAC: Cetyltrimethylammonium Chloride
GMC: Glycerol Monocaprylate
SDBS: Sodium Dodecyl Benzene Sulfonate
RHA: Rhamnolipid Biosurfactant
SAP: Saponin Biosurfactant

Dendrograms and heat map

The line dendrogram was created to show the variables related to the types of water divided into groupings and the column dendrogram the groupings of the types of surfactants. The heat map shows the frequency of studies that combined each type of water with a specific type of surfactant.

# Create dendrogram for dependent variables (water types)
dend_dependents <- as.dendrogram(hclust(dist(data)))
dend_dependents <- dend_dependents %>%
  set("branches_lwd", 2) %>%
  set("labels_cex", 0.6) %>%  
  set("nodes_pch", 19) %>%
  set("nodes_cex", 0.5) %>%
  set("branches_col", "darkblue")

# Create dendrogram for independent variables (surfactant types)
dend_independents <- as.dendrogram(hclust(dist(t(data))))
dend_independents <- dend_independents %>%
  set("branches_lwd", 2) %>%
  set("labels_cex", 0.6) %>%  
  set("nodes_pch", 19) %>%
  set("nodes_cex", 0.5) %>%
  set("branches_col", "darkblue")

# Create more contrasting color palette
col_fun <- colorRamp2(c(min(data, na.rm = TRUE), max(data, na.rm = TRUE)), c("#FFFFFF", "#004080"))

# Create heatmap using ComplexHeatmap
heatmap_plot <- Heatmap(as.matrix(data),
  name = "Value",
  col = col_fun,
  cluster_rows = dend_dependents,
  cluster_columns = dend_independents,
  row_names_gp = gpar(fontsize = 8),  
  column_names_gp = gpar(fontsize = 8, rot = 90),  
  row_title = "Water Type",
  column_title = "Surfactant Type",
  row_title_gp = gpar(fontsize = 16, fontface = "bold"),  
  column_title_gp = gpar(fontsize = 16, fontface = "bold"), 
  heatmap_legend_param = list(
    title = "Frequency",
    labels_gp = gpar(fontsize = 10),  
    title_gp = gpar(fontsize = 12, fontface = "bold"),  
    legend_height = unit(4, "cm"),
    legend_direction = "vertical"
  ),
  row_dend_width = unit(2, "cm"),  
  column_dend_height = unit(2, "cm"),  
  cell_fun = function(j, i, x, y, width, height, fill) {
    grid.rect(x = x, y = y, width = width, height = height, gp = gpar(fill = fill, col = "gray"))
  },
  row_gap = unit(2, "mm"),  
  column_gap = unit(2, "mm"),  
  row_names_max_width = unit(10, "cm"),  
  column_names_max_height = unit(10, "cm"))  

# Display heatmap
draw(heatmap_plot, heatmap_legend_side = "right")