library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
serum <- "./Serum_metadata.txt" |> read_tsv() |> mutate(source = "serum") |> 
  mutate(  Dysbiosis_Index = Dysbiosis_Index |> as.numeric()
         , FCP_Absolute = FCP_Absolute |> as.numeric())
## Rows: 206 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (34): SampleID, Cohort, pn_ID, Gender, Smoking, DOB, Date, Fasting, Dx, ...
## dbl  (9): Age, Visit, Days_since_first_serum, Months_since_first_serum, CRP_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
stool <- "./Stool_metadata.txt" |> read_tsv() |> mutate(source = "stool") |> 
  mutate(Dysbiosis_Index = Dysbiosis_Index |> as.numeric())
## Rows: 312 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (27): SampleID, Cohort, pn_ID, Gender, DOB, Smoking, Date, Visit_origina...
## dbl  (9): Age, Visit, CRP_Absolute, FCP_Absolute, Total_LS, Highest_LS, CDAI...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

Couples

input <- 
"Faith + dysbiosis index
Faith + FCP_Absolute
Faith + CRP_Absolute
Dysbiosis index + FCP absolute
Dysbiosis index + CRP absolute
FCP absolute + CRP absolute"

#  AAAAAAAAHHHHHHHH!
input <- input |> str_replace_all("dysbiosis index", "Dysbiosis_Index")
input <- input |> str_replace_all("Dysbiosis index", "Dysbiosis_Index")
input <- input |> str_replace_all("Faith", "Faith_pd")
input <- input |> str_replace_all("FCP a", "FCP_A")
input <- input |> str_replace_all("CRP a", "CRP_A")
couples <- read.table(sep = '+', text = input)
couples$V1 <- couples$V1 |> trimws()
couples$V2 <- couples$V2 |> trimws()

Correlation - complicated

  1. לכל זוג צריך לבדוק את קורלציית ספירמן

  2. ואז הצבעים בהיטמאפ יהיה ה-R,

  3. ואם זה מובהק אז צריך להוסיף את ה- PVAL לריבוע.

couples$cor <- NA
couples$pval <- NA
for(row in couples |> nrow() |> seq()){ # Shame on me
  v1 <- couples[row,1] ; v2 <- couples[row,2] 
  # 1
  couples[row, "cor"] <- serum |> 
    select(SampleID, Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> drop_na() |>
  summarise(cor(!!sym(v1), !!sym(v2), method = "spearman"))
  # 3
  couples[row, "pval"] <- serum |> 
    select(SampleID, Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> drop_na() |>
  summarise(cor.test(x = !!sym(v1),y =  !!sym(v2), method = "spearman")$p.value)
}
## Warning in cor.test.default(x = Faith_pd, y = FCP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Faith_pd, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Dysbiosis_Index, y = FCP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Dysbiosis_Index, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = FCP_Absolute, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
couples |>
  ggplot(aes(x = V1, y = V2, fill = cor)) + # 2
                    geom_tile() +
                    geom_text(aes(label = round(pval, 4))) + 
  theme_classic() + scale_fill_gradient2(low = "red",  high = "blue", mid = "white") + 
  theme(axis.title = element_blank()) + ggtitle("Corplot of some measurments (serum metadata)")

Correlation - easy

Serum

stool

# vals <- serum |>
vals <- stool |>
  select(Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> 
  drop_na()

m <- cor(vals, method = "spearman")
peas <- corrplot::cor.mtest(vals)
corrplot::corrplot(m ,p.mat = peas$p
                   , type = "lower"
                   # , sig.level = 0.05
                   # , sig.level = 1
                   , order = 'hclust'
                   # , insig = 'label_sig', sig.level = c(.000001,.0001, 0.001, 0.01, 0.05)
                   , addrect = 2
                   , number.cex = .5
                   , addCoef.col ='black'
                   , tl.col = 'black'
                   # , tl.pos = 'd'
                   , diag=F)

vals |> nrow()
## [1] 196
m 
##                   Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd         1.0000000   -0.1666612   -0.1101604      -0.2411797
## FCP_Absolute    -0.1666612    1.0000000    0.1601418       0.2242859
## CRP_Absolute    -0.1101604    0.1601418    1.0000000       0.1323307
## Dysbiosis_Index -0.2411797    0.2242859    0.1323307       1.0000000
peas$p
##                    Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd        0.00000e+00 1.158700e-03 2.054160e-02    1.669500e-09
## FCP_Absolute    1.15870e-03 0.000000e+00 2.949338e-06    2.062564e-02
## CRP_Absolute    2.05416e-02 2.949338e-06 0.000000e+00    6.897010e-04
## Dysbiosis_Index 1.66950e-09 2.062564e-02 6.897010e-04    0.000000e+00

Only disease

# vals <- serum |>
vals <- stool |>
  filter(Dx == "CD") |> 
  select(Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> 
  drop_na()

m <- cor(vals, method = "spearman")
peas <- corrplot::cor.mtest(vals)
corrplot::corrplot(m ,p.mat = peas$p
                   , type = "lower"
                   # , sig.level = 0.05
                   # , sig.level = 1
                   , order = 'hclust'
                   # , insig = 'label_sig', sig.level = c(.000001,.0001, 0.001, 0.01, 0.05)
                   , addrect = 2
                   , number.cex = .5
                   , addCoef.col ='black'
                   , tl.col = 'black'
                   # , tl.pos = 'd'
                   , diag=F)

vals |> nrow()
## [1] 196
m 
##                   Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd         1.0000000   -0.1666612   -0.1101604      -0.2411797
## FCP_Absolute    -0.1666612    1.0000000    0.1601418       0.2242859
## CRP_Absolute    -0.1101604    0.1601418    1.0000000       0.1323307
## Dysbiosis_Index -0.2411797    0.2242859    0.1323307       1.0000000
peas$p
##                    Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd        0.00000e+00 1.158700e-03 2.054160e-02    1.669500e-09
## FCP_Absolute    1.15870e-03 0.000000e+00 2.949338e-06    2.062564e-02
## CRP_Absolute    2.05416e-02 2.949338e-06 0.000000e+00    6.897010e-04
## Dysbiosis_Index 1.66950e-09 2.062564e-02 6.897010e-04    0.000000e+00

* P<0.05

** P< 0.01

*** P<0.001

**** P<0.0001

***** P<0.00001

Barplot

Serum

בנוסף, אם יש לך כוח,

צריך גם לעשות בר-פלוט שמשווה בין

כאשר ציר X הוא ה- disease status

וציר y הוא faith פעם אחת,

ופעם נוספת הוא dysibiosis index.

כמובן שצריך גם כאן לעשות מבחן סטטיסטי…

mycompr <- list(c("CD_active", "Control"), 
                c("CD_Remission", "CD_active"), 
                c("CD_Remission", "Control"))

library(ggsignif)
serum |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |> 
  ggplot(aes(x = Disease_Status, y = Faith_pd)) + 
  geom_boxplot() +
  geom_jitter(color = "grey", alpha = .7) +
  geom_signif(comparisons = mycompr, step_increase = .3) +
  theme_classic()
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).
## Warning: Removed 70 rows containing non-finite values (stat_signif).
## Warning: Removed 70 rows containing missing values (geom_point).

### Stool

stool |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |> 
  ggplot(aes(x = Disease_Status, y = Faith_pd)) + 
  geom_boxplot() +
  geom_jitter(color = "grey", alpha = .7) +
  geom_signif(comparisons = mycompr, step_increase = .3) +
  theme_classic()
## Warning: Removed 74 rows containing non-finite values (stat_boxplot).
## Warning: Removed 74 rows containing non-finite values (stat_signif).
## Warning in wilcox.test.default(c(8.992, 11.905, 16.197, 9.401, 6.137, 9.192, :
## cannot compute exact p-value with ties
## Warning: Removed 74 rows containing missing values (geom_point).

Serum

serum |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |> 
  ggplot(aes(x = Disease_Status, y = Dysbiosis_Index)) + 
  geom_boxplot() +
  geom_jitter(color = "grey", alpha = .7) +
  geom_signif(comparisons = mycompr, step_increase = .1) +
  theme_classic()
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).
## Warning: Removed 68 rows containing non-finite values (stat_signif).
## Warning: Removed 68 rows containing missing values (geom_point).

Stool

stool |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |> 
  ggplot(aes(x = Disease_Status, y = Dysbiosis_Index)) + 
  geom_boxplot() +
  geom_jitter(color = "grey", alpha = .7) +
  geom_signif(comparisons = mycompr, step_increase = .1) +
  theme_classic()
## Warning: Removed 69 rows containing non-finite values (stat_boxplot).
## Warning: Removed 69 rows containing non-finite values (stat_signif).
## Warning: Removed 69 rows containing missing values (geom_point).

מצורף קובץ המטהדטה של הסרומים ושל הצואות.

אני לא יודעת אם צריך לכל מטהדטה בנפרד או

שאפשר לאחד ביניהם ולבדוק את כולם ביחד, אז החלטה שלך…

אם אתה מצליח לעשות את זה עד שני

(כדי שאולי יהיה סיכוי שאני לא אצא גרועה ממש ואבקש מיעל לדחות) זה יהיה מעולה!

תודה רבה רבה רבה!!!