Relationship between dependent and independent variables

This report was prepared with the objective of showing the development of the script to assess the frequency of the evaluation of the influence of explanatory variables on response variables in scientific studies.

Packages

# Load required packages
library(ComplexHeatmap)
library(dendextend)
library(circlize)
library(RColorBrewer)
library(grid)
library(DT)

Data collection

The qualitative data observed are included in supplementary file 6, more specifically in the columns “Dependent variables (effects)” and “Independent variables (causes)”.

# Create the data matrix
data <- data.frame(
  Concentração_de_surfactante = c(22, 16, 1, 5, 5, 4),
  Hidrodinâmica = c(17, 11, 1, 5, 3, 4),
  Temperatura_na_fase_líquida = c(8, 4, 1, 3, 2, 1),
  Tipo_de_surfactante = c(9, 5, 0, 3, 1, 3),
  Tensão_superficial = c(3, 4, 0, 1, 1, 0),
  pH = c(2, 1, 0, 0, 0, 0),
  Temperatura_do_ar = c(0, 1, 1, 0, 0, 0),
  Pressão_do_ar = c(0, 1, 0, 0, 0, 0),
  Viscosidade_da_fase_líquida = c(1, 2, 1, 2, 0, 1),
  Número_de_Reynolds = c(2, 0, 1, 0, 1, 0),
  row.names = c("kL", "KLa", "k2", "Oxygen transfer rate", "Diffusivity", "Surface tension")
)

# Set English column names
colnames(data) <- c("Surfactant Concentration", "Hydrodynamics", "Liquid Temperature",
                    "Surfactant Type", "Surface Tension", "pH", "Air Temperature",
                    "Air Pressure", "Viscosity", "Reynolds number")

Data table

# Display interactive table with fixed first column
DT::datatable(
  data,
  rownames = TRUE,
  extensions = 'FixedColumns',
  options = list(
    pageLength = 10,
    scrollX = TRUE,
    fixedColumns = list(leftColumns = 1)
  )
) |>
  formatStyle(
    columns = 0,
    fontWeight = "bold"
  ) |>
  formatStyle(
    columns = colnames(data),
    fontWeight = "normal",
    textAlign = "center"
  )

Dendrograms and heatmaps

The row dendrogram was created to show the dependent variables divided into clusters and the column dendrogram the clusters of the independent variables. The heatmap shows the frequency of studies that combined each type of response variable with a given explanatory variable.

# Create dendrograms with consistent styling
dend_dependent <- as.dendrogram(hclust(dist(data))) |>
  set("branches_lwd", 2) |>
  set("labels_cex", 0.6) |>
  set("nodes_pch", 19) |>
  set("nodes_cex", 0.5) |>
  set("branches_col", "darkblue")

dend_independent <- as.dendrogram(hclust(dist(t(data)))) |>
  set("branches_lwd", 2) |>
  set("labels_cex", 0.6) |>
  set("nodes_pch", 19) |>
  set("nodes_cex", 0.5) |>
  set("branches_col", "darkblue")

# Create color gradient
col_fun <- colorRamp2(c(min(data), max(data)), c("#FFFFFF", "#004080"))

# Generate heatmap 
Heatmap(
  as.matrix(data),
  name = "Frequency",
  col = col_fun,
  cluster_rows = dend_dependent,
  cluster_columns = dend_independent,
  row_names_gp = gpar(fontsize = 10),
  column_names_gp = gpar(fontsize = 10, rot = 90),
  row_title = "Dependent Variables",
  column_title = "Independent Variables",
  row_title_gp = gpar(fontsize = 14, fontface = "bold"),
  column_title_gp = gpar(fontsize = 14, fontface = "bold"),
  heatmap_legend_param = list(
    title = "Frequency",
    labels_gp = gpar(fontsize = 10),
    title_gp = gpar(fontsize = 12, fontface = "bold"),
    legend_height = unit(4, "cm"),
    legend_direction = "vertical"
  ),
  row_dend_width = unit(2, "cm"),
  column_dend_height = unit(2, "cm"),
  cell_fun = function(j, i, x, y, width, height, fill) {
    grid.rect(x = x, y = y, width = width, height = height, 
              gp = gpar(fill = fill, col = "gray"))
    grid.text(
      sprintf("%.0f", data[i, j]),
      x, y,
      gp = gpar(fontsize = 10, col = "black")
    )
  },
  row_gap = unit(2, "mm"),
  column_gap = unit(2, "mm"),
  row_names_max_width = unit(10, "cm"),
  column_names_max_height = unit(10, "cm")
) |> draw(heatmap_legend_side = "right")