library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
serum <- "./Serum_metadata.txt" |> read_tsv() |> mutate(source = "serum") |>
mutate( Dysbiosis_Index = Dysbiosis_Index |> as.numeric()
, FCP_Absolute = FCP_Absolute |> as.numeric())
## Rows: 206 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (34): SampleID, Cohort, pn_ID, Gender, Smoking, DOB, Date, Fasting, Dx, ...
## dbl (9): Age, Visit, Days_since_first_serum, Months_since_first_serum, CRP_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
stool <- "./Stool_metadata.txt" |> read_tsv() |> mutate(source = "stool") |>
mutate(Dysbiosis_Index = Dysbiosis_Index |> as.numeric())
## Rows: 312 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (27): SampleID, Cohort, pn_ID, Gender, DOB, Smoking, Date, Visit_origina...
## dbl (9): Age, Visit, CRP_Absolute, FCP_Absolute, Total_LS, Highest_LS, CDAI...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
input <-
"Faith + dysbiosis index
Faith + FCP_Absolute
Faith + CRP_Absolute
Dysbiosis index + FCP absolute
Dysbiosis index + CRP absolute
FCP absolute + CRP absolute"
# AAAAAAAAHHHHHHHH!
input <- input |> str_replace_all("dysbiosis index", "Dysbiosis_Index")
input <- input |> str_replace_all("Dysbiosis index", "Dysbiosis_Index")
input <- input |> str_replace_all("Faith", "Faith_pd")
input <- input |> str_replace_all("FCP a", "FCP_A")
input <- input |> str_replace_all("CRP a", "CRP_A")
couples <- read.table(sep = '+', text = input)
couples$V1 <- couples$V1 |> trimws()
couples$V2 <- couples$V2 |> trimws()
לכל זוג צריך לבדוק את קורלציית ספירמן
ואז הצבעים בהיטמאפ יהיה ה-R,
ואם זה מובהק אז צריך להוסיף את ה- PVAL לריבוע.
couples$cor <- NA
couples$pval <- NA
for(row in couples |> nrow() |> seq()){ # Shame on me
v1 <- couples[row,1] ; v2 <- couples[row,2]
# 1
couples[row, "cor"] <- serum |>
select(SampleID, Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> drop_na() |>
summarise(cor(!!sym(v1), !!sym(v2), method = "spearman"))
# 3
couples[row, "pval"] <- serum |>
select(SampleID, Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |> drop_na() |>
summarise(cor.test(x = !!sym(v1),y = !!sym(v2), method = "spearman")$p.value)
}
## Warning in cor.test.default(x = Faith_pd, y = FCP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Faith_pd, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Dysbiosis_Index, y = FCP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = Dysbiosis_Index, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
## Warning in cor.test.default(x = FCP_Absolute, y = CRP_Absolute, method =
## "spearman"): Cannot compute exact p-value with ties
couples |>
ggplot(aes(x = V1, y = V2, fill = cor)) + # 2
geom_tile() +
geom_text(aes(label = round(pval, 4))) +
theme_classic() + scale_fill_gradient2(low = "red", high = "blue", mid = "white") +
theme(axis.title = element_blank()) + ggtitle("Corplot of some measurments (serum metadata)")
# vals <- serum |>
vals <- stool |>
select(Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |>
drop_na()
m <- cor(vals, method = "spearman")
peas <- corrplot::cor.mtest(vals)
corrplot::corrplot(m ,p.mat = peas$p
, type = "lower"
# , sig.level = 0.05
# , sig.level = 1
, order = 'hclust'
# , insig = 'label_sig', sig.level = c(.000001,.0001, 0.001, 0.01, 0.05)
, addrect = 2
, number.cex = .5
, addCoef.col ='black'
, tl.col = 'black'
# , tl.pos = 'd'
, diag=F)
vals |> nrow()
## [1] 196
m
## Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd 1.0000000 -0.1666612 -0.1101604 -0.2411797
## FCP_Absolute -0.1666612 1.0000000 0.1601418 0.2242859
## CRP_Absolute -0.1101604 0.1601418 1.0000000 0.1323307
## Dysbiosis_Index -0.2411797 0.2242859 0.1323307 1.0000000
peas$p
## Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd 0.00000e+00 1.158700e-03 2.054160e-02 1.669500e-09
## FCP_Absolute 1.15870e-03 0.000000e+00 2.949338e-06 2.062564e-02
## CRP_Absolute 2.05416e-02 2.949338e-06 0.000000e+00 6.897010e-04
## Dysbiosis_Index 1.66950e-09 2.062564e-02 6.897010e-04 0.000000e+00
# vals <- serum |>
vals <- stool |>
filter(Dx == "CD") |>
select(Faith_pd, FCP_Absolute, CRP_Absolute, Dysbiosis_Index) |>
drop_na()
m <- cor(vals, method = "spearman")
peas <- corrplot::cor.mtest(vals)
corrplot::corrplot(m ,p.mat = peas$p
, type = "lower"
# , sig.level = 0.05
# , sig.level = 1
, order = 'hclust'
# , insig = 'label_sig', sig.level = c(.000001,.0001, 0.001, 0.01, 0.05)
, addrect = 2
, number.cex = .5
, addCoef.col ='black'
, tl.col = 'black'
# , tl.pos = 'd'
, diag=F)
vals |> nrow()
## [1] 196
m
## Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd 1.0000000 -0.1666612 -0.1101604 -0.2411797
## FCP_Absolute -0.1666612 1.0000000 0.1601418 0.2242859
## CRP_Absolute -0.1101604 0.1601418 1.0000000 0.1323307
## Dysbiosis_Index -0.2411797 0.2242859 0.1323307 1.0000000
peas$p
## Faith_pd FCP_Absolute CRP_Absolute Dysbiosis_Index
## Faith_pd 0.00000e+00 1.158700e-03 2.054160e-02 1.669500e-09
## FCP_Absolute 1.15870e-03 0.000000e+00 2.949338e-06 2.062564e-02
## CRP_Absolute 2.05416e-02 2.949338e-06 0.000000e+00 6.897010e-04
## Dysbiosis_Index 1.66950e-09 2.062564e-02 6.897010e-04 0.000000e+00
* P<0.05
** P< 0.01
*** P<0.001
**** P<0.0001
***** P<0.00001
בנוסף, אם יש לך כוח,
צריך גם לעשות בר-פלוט שמשווה בין
כאשר ציר X הוא ה- disease status
וציר y הוא faith פעם אחת,
ופעם נוספת הוא dysibiosis index.
כמובן שצריך גם כאן לעשות מבחן סטטיסטי…
mycompr <- list(c("CD_active", "Control"),
c("CD_Remission", "CD_active"),
c("CD_Remission", "Control"))
library(ggsignif)
serum |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |>
ggplot(aes(x = Disease_Status, y = Faith_pd)) +
geom_boxplot() +
geom_jitter(color = "grey", alpha = .7) +
geom_signif(comparisons = mycompr, step_increase = .3) +
theme_classic()
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).
## Warning: Removed 70 rows containing non-finite values (stat_signif).
## Warning: Removed 70 rows containing missing values (geom_point).
### Stool
stool |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |>
ggplot(aes(x = Disease_Status, y = Faith_pd)) +
geom_boxplot() +
geom_jitter(color = "grey", alpha = .7) +
geom_signif(comparisons = mycompr, step_increase = .3) +
theme_classic()
## Warning: Removed 74 rows containing non-finite values (stat_boxplot).
## Warning: Removed 74 rows containing non-finite values (stat_signif).
## Warning in wilcox.test.default(c(8.992, 11.905, 16.197, 9.401, 6.137, 9.192, :
## cannot compute exact p-value with ties
## Warning: Removed 74 rows containing missing values (geom_point).
serum |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |>
ggplot(aes(x = Disease_Status, y = Dysbiosis_Index)) +
geom_boxplot() +
geom_jitter(color = "grey", alpha = .7) +
geom_signif(comparisons = mycompr, step_increase = .1) +
theme_classic()
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).
## Warning: Removed 68 rows containing non-finite values (stat_signif).
## Warning: Removed 68 rows containing missing values (geom_point).
stool |> select(SampleID, Disease_Status, Faith_pd, Dysbiosis_Index) |>
ggplot(aes(x = Disease_Status, y = Dysbiosis_Index)) +
geom_boxplot() +
geom_jitter(color = "grey", alpha = .7) +
geom_signif(comparisons = mycompr, step_increase = .1) +
theme_classic()
## Warning: Removed 69 rows containing non-finite values (stat_boxplot).
## Warning: Removed 69 rows containing non-finite values (stat_signif).
## Warning: Removed 69 rows containing missing values (geom_point).
מצורף קובץ המטהדטה של הסרומים ושל הצואות.
אני לא יודעת אם צריך לכל מטהדטה בנפרד או
שאפשר לאחד ביניהם ולבדוק את כולם ביחד, אז החלטה שלך…
אם אתה מצליח לעשות את זה עד שני
(כדי שאולי יהיה סיכוי שאני לא אצא גרועה ממש ואבקש מיעל לדחות) זה יהיה מעולה!
תודה רבה רבה רבה!!!