library(readr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
Pull_Homogenate_Neg_Pos <- read_csv("~/Downloads/Pull.Homogenate.Neg.Pos.csv")
## Rows: 1741 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Proteins
## dbl (12): X.124HomogenateIP, X.135HomogenateIP, X.119HomogenateIP, X.121Homo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Assign data frame
df <- Pull_Homogenate_Neg_Pos
# Define sample names
negative.samples <- c('X.124HomogenateIP', 'X.135HomogenateIP', 'X.119HomogenateIP', 'X.121HomogenateIP')
positive.samples <- c(
'X.106HomogenateIP',
'X.120HomogenateIP',
'X.117HomogenateIP',
'X.123HomogenateIP',
'X.137HomogenateIP',
'X.131HomogenateIP',
'X.143HomogenateIP',
'X.133HomogenateIP'
)
# Define Panel I proteins
panel_I_proteins <- c("PRUN1", "COX5B", "RAB10", "NDUFA2", "SEPT2", "PSAT1", "PALM2")
# Reshape and assign groups
df_long <- df %>%
filter(Proteins %in% panel_I_proteins) %>%
pivot_longer(-Proteins, names_to = "Sample", values_to = "Abundance") %>%
mutate(
Group = case_when(
Sample %in% negative.samples ~ "Negative Pulldown",
Sample %in% positive.samples ~ "Camk2a Pulldown",
TRUE ~ NA_character_
),
Log2Abundance = log2(Abundance)
) %>%
filter(!is.na(Group))
# Set protein order for consistent X-axis
df_long$Proteins <- factor(df_long$Proteins, levels = panel_I_proteins)
# T-tests per protein
pvals <- df_long %>%
group_by(Proteins) %>%
summarise(p_value = t.test(Log2Abundance ~ Group)$p.value, .groups = "drop") %>%
mutate(
Significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
)
)
# Y-position for stars
star_labels <- df_long %>%
group_by(Proteins) %>%
summarise(y_pos = max(Log2Abundance, na.rm = TRUE) + 0.2, .groups = "drop") %>%
left_join(pvals, by = "Proteins") %>%
filter(!is.na(Proteins))
# Plot
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 3) +
stat_summary(fun = mean, geom = "point", shape = 95, size = 5,
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) + # open circle/square
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel I: Homogenate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(readr)
Pulldown_Negative_Positive_all_samples_june15_2025 <- read_csv("~/Downloads/Pulldown.Negative.Positive.all.samples.june15.2025.csv")
## Rows: 1741 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Proteins
## dbl (12): X.124P2fractionIP, X.135P2fractionIP, X.119P2fractionIP, X.121P2fr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- Pulldown_Negative_Positive_all_samples_june15_2025
colnames(df)
## [1] "Proteins" "X.124P2fractionIP" "X.135P2fractionIP"
## [4] "X.119P2fractionIP" "X.121P2fractionIP" "X.106P2fractionIP"
## [7] "X.120P2fractionIP" "X.117P2fractionIP" "X.123P2fractionIP"
## [10] "X.137P2fractionIP" "X.131P2fractionIP" "X.143P2fractionIP"
## [13] "X.133P2fractionIP"
library(readr)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggpubr)
# Assign your dataframe
df <- Pulldown_Negative_Positive_all_samples_june15_2025
# Define sample names from your column headers
negative.samples <- c("X.124P2fractionIP", "X.135P2fractionIP", "X.119P2fractionIP", "X.121P2fractionIP")
positive.samples <- c("X.106P2fractionIP", "X.120P2fractionIP", "X.117P2fractionIP",
"X.123P2fractionIP", "X.137P2fractionIP", "X.131P2fractionIP",
"X.143P2fractionIP", "X.133P2fractionIP")
panel_J_proteins <- c("PRKAA", "RPL28", "SHANK2", "ARG5", "TOM20", "MARK2", "VDAC1")
# Reshape + label data
df_long <- df %>%
filter(Proteins %in% panel_J_proteins) %>%
pivot_longer(-Proteins, names_to = "Sample", values_to = "Abundance") %>%
mutate(
Group = case_when(
Sample %in% negative.samples ~ "Negative Pulldown",
Sample %in% positive.samples ~ "Camk2a Pulldown",
TRUE ~ NA_character_
),
Log2Abundance = log2(Abundance)
) %>%
filter(!is.na(Group))
# Order proteins on x-axis
df_long$Proteins <- factor(df_long$Proteins, levels = panel_J_proteins)
# p-values
pvals <- df_long %>%
group_by(Proteins) %>%
summarise(p_value = t.test(Log2Abundance ~ Group)$p.value, .groups = "drop") %>%
mutate(
Significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
)
)
# Y position for stars
star_labels <- df_long %>%
group_by(Proteins) %>%
summarise(y_pos = max(Log2Abundance, na.rm = TRUE) + 0.4, .groups = "drop") %>%
left_join(pvals, by = "Proteins") %>%
filter(!is.na(Proteins))
# Plot
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 2.5, alpha = 0.9) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3.5, fill = "white",
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) +
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel J: P2 Fraction Pulldown") +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "top"
)
Pull_Homogenate_Neg_Pos <- read_csv("~/Downloads/Pull.Homogenate.Neg.Pos.csv")
## Rows: 1741 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Proteins
## dbl (12): X.124HomogenateIP, X.135HomogenateIP, X.119HomogenateIP, X.121Homo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Assign data frame
df <- Pull_Homogenate_Neg_Pos
# Define sample names
negative.samples <- c('X.124HomogenateIP', 'X.135HomogenateIP', 'X.119HomogenateIP', 'X.121HomogenateIP')
positive.samples <- c(
'X.106HomogenateIP',
'X.120HomogenateIP',
'X.117HomogenateIP',
'X.123HomogenateIP',
'X.137HomogenateIP',
'X.131HomogenateIP',
'X.143HomogenateIP',
'X.133HomogenateIP'
)
# Define Panel I proteins
panel_I_proteins <- c(
"PRUN1", # ***
"STMN1", # ***
"DLGP4", # ***
"NCAM1", # ***
"STX1B", # ***
"HOME1", # ***
"SV2A", # ***
"NDUFA2", # ***
"SHRM2", # ***
"EFL1", # **
"FKB1A", # **
"CLD11", # **
"GRIA2", # **
"COX5B", # *
"RAB10", # *
"PALM2", # **
"NDUAB", # *
"SHAN2", # *
"DGKZ", # *
"SEPT2_MOUSE", # Not clearly marked but included
"GRIN1", # Not marked
"SEPT2", # Not shown (possible duplicate of SEPT2_MOUSE?)
"PSAT1" # Not shown
)
# Reshape and assign groups
df_long <- df %>%
filter(Proteins %in% panel_I_proteins) %>%
pivot_longer(-Proteins, names_to = "Sample", values_to = "Abundance") %>%
mutate(
Group = case_when(
Sample %in% negative.samples ~ "Negative Pulldown",
Sample %in% positive.samples ~ "Camk2a Pulldown",
TRUE ~ NA_character_
),
Log2Abundance = log2(Abundance)
) %>%
filter(!is.na(Group))
# Set protein order for consistent X-axis
df_long$Proteins <- factor(df_long$Proteins, levels = panel_I_proteins)
# T-tests per protein
pvals <- df_long %>%
group_by(Proteins) %>%
summarise(p_value = t.test(Log2Abundance ~ Group)$p.value, .groups = "drop") %>%
mutate(
Significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
)
)
# Y-position for stars
star_labels <- df_long %>%
group_by(Proteins) %>%
summarise(y_pos = max(Log2Abundance, na.rm = TRUE) + 0.2, .groups = "drop") %>%
left_join(pvals, by = "Proteins") %>%
filter(!is.na(Proteins))
star_labels
## # A tibble: 20 × 4
## Proteins y_pos p_value Significance
## <fct> <dbl> <dbl> <chr>
## 1 PRUN1 4.94 0.000916 ***
## 2 STMN1 4.95 0.00247 **
## 3 DLGP4 4.82 0.0000113 ***
## 4 NCAM1 4.97 0.0000775 ***
## 5 STX1B 4.98 0.00169 **
## 6 HOME1 4.82 0.000405 ***
## 7 SV2A 4.87 0.00145 **
## 8 SHRM2 4.72 0.000112 ***
## 9 EFL1 4.87 0.00477 **
## 10 FKB1A 4.89 0.00160 **
## 11 CLD11 4.80 0.00401 **
## 12 GRIA2 4.88 0.0000138 ***
## 13 COX5B 4.89 0.0205 *
## 14 RAB10 4.85 0.0338 *
## 15 PALM2 4.69 0.00454 **
## 16 NDUAB 4.78 0.0356 *
## 17 SHAN2 4.94 0.00000448 ***
## 18 DGKZ 4.71 0.0292 *
## 19 SEPT2_MOUSE 4.71 0.000114 ***
## 20 GRIN1 4.93 0.00000000300 ***
df_long
## # A tibble: 240 × 5
## Proteins Sample Abundance Group Log2Abundance
## <fct> <chr> <dbl> <chr> <dbl>
## 1 SHRM2 X.124HomogenateIP 20.0 Negative Pulldown 4.32
## 2 SHRM2 X.135HomogenateIP 19.4 Negative Pulldown 4.28
## 3 SHRM2 X.119HomogenateIP 19.7 Negative Pulldown 4.30
## 4 SHRM2 X.121HomogenateIP 18.8 Negative Pulldown 4.23
## 5 SHRM2 X.106HomogenateIP 21.5 Camk2a Pulldown 4.42
## 6 SHRM2 X.120HomogenateIP 21.6 Camk2a Pulldown 4.43
## 7 SHRM2 X.117HomogenateIP 22.7 Camk2a Pulldown 4.50
## 8 SHRM2 X.123HomogenateIP 22.5 Camk2a Pulldown 4.49
## 9 SHRM2 X.137HomogenateIP 20.3 Camk2a Pulldown 4.34
## 10 SHRM2 X.131HomogenateIP 22.5 Camk2a Pulldown 4.49
## # ℹ 230 more rows
# Plot
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 3) +
stat_summary(fun = mean, geom = "point", shape = 95, size = 5,
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) + # open circle/square
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel I: Homogenate") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 3) +
stat_summary(fun = mean, geom = "point", shape = 95, size = 5,
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) +
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel I: Homogenate") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_rect(color = "black", fill = NA, linewidth = 1)
)
library(readr)
Pulldown_Negative_Positive_all_samples_june15_2025 <- read_csv("~/Downloads/Pulldown.Negative.Positive.all.samples.june15.2025.csv")
## Rows: 1741 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Proteins
## dbl (12): X.124P2fractionIP, X.135P2fractionIP, X.119P2fractionIP, X.121P2fr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- Pulldown_Negative_Positive_all_samples_june15_2025
colnames(df)
## [1] "Proteins" "X.124P2fractionIP" "X.135P2fractionIP"
## [4] "X.119P2fractionIP" "X.121P2fractionIP" "X.106P2fractionIP"
## [7] "X.120P2fractionIP" "X.117P2fractionIP" "X.123P2fractionIP"
## [10] "X.137P2fractionIP" "X.131P2fractionIP" "X.143P2fractionIP"
## [13] "X.133P2fractionIP"
library(readr)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggpubr)
# Assign your dataframe
df <- Pulldown_Negative_Positive_all_samples_june15_2025
# Define sample names from your column headers
negative.samples <- c("X.124P2fractionIP", "X.135P2fractionIP", "X.119P2fractionIP", "X.121P2fractionIP")
positive.samples <- c("X.106P2fractionIP", "X.120P2fractionIP", "X.117P2fractionIP",
"X.123P2fractionIP", "X.137P2fractionIP", "X.131P2fractionIP",
"X.143P2fractionIP", "X.133P2fractionIP")
panel_J_proteins <- c(
"TOM20", "MARK2", "MTAP2", "PTN11", "RAB6B", "PGK1", "SPTB2",
"SGT1", "EF1A2", "DLGP2", "NDUA4", "DYN1", "ATP5I", "AK1A1", "S4R225", "THEM4", "NDKA", "RB39B", "DDX5"
)
# Reshape + label data
df_long <- df %>%
filter(Proteins %in% panel_J_proteins) %>%
pivot_longer(-Proteins, names_to = "Sample", values_to = "Abundance") %>%
mutate(
Group = case_when(
Sample %in% negative.samples ~ "Negative Pulldown",
Sample %in% positive.samples ~ "Camk2a Pulldown",
TRUE ~ NA_character_
),
Log2Abundance = log2(Abundance)
) %>%
filter(!is.na(Group))
# Order proteins on x-axis
df_long$Proteins <- factor(df_long$Proteins, levels = panel_J_proteins)
# p-values
pvals <- df_long %>%
group_by(Proteins) %>%
summarise(p_value = t.test(Log2Abundance ~ Group)$p.value, .groups = "drop") %>%
mutate(
Significance = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ "ns"
)
)
# Y position for stars
star_labels <- df_long %>%
group_by(Proteins) %>%
summarise(y_pos = max(Log2Abundance, na.rm = TRUE) + 0.4, .groups = "drop") %>%
left_join(pvals, by = "Proteins") %>%
filter(!is.na(Proteins))
star_labels
## # A tibble: 19 × 4
## Proteins y_pos p_value Significance
## <fct> <dbl> <dbl> <chr>
## 1 TOM20 4.97 0.000210 ***
## 2 MARK2 4.98 0.0170 *
## 3 MTAP2 5.25 0.00844 **
## 4 PTN11 4.95 0.0157 *
## 5 RAB6B 5.20 0.00550 **
## 6 PGK1 5.08 0.00212 **
## 7 SPTB2 5.20 0.00152 **
## 8 SGT1 4.86 0.00499 **
## 9 EF1A2 5.20 0.00622 **
## 10 DLGP2 4.95 0.00297 **
## 11 NDUA4 5.17 0.0145 *
## 12 DYN1 5.02 0.00123 **
## 13 ATP5I 4.83 0.0253 *
## 14 AK1A1 4.86 0.00843 **
## 15 S4R225 4.94 0.0000265 ***
## 16 THEM4 4.96 0.0471 *
## 17 NDKA 5.02 0.00415 **
## 18 RB39B 4.96 0.00198 **
## 19 DDX5 4.91 0.0304 *
df_long
## # A tibble: 228 × 5
## Proteins Sample Abundance Group Log2Abundance
## <fct> <chr> <dbl> <chr> <dbl>
## 1 PGK1 X.124P2fractionIP 24.1 Negative Pulldown 4.59
## 2 PGK1 X.135P2fractionIP 24.2 Negative Pulldown 4.59
## 3 PGK1 X.119P2fractionIP 24.8 Negative Pulldown 4.63
## 4 PGK1 X.121P2fractionIP 24.3 Negative Pulldown 4.60
## 5 PGK1 X.106P2fractionIP 25.7 Camk2a Pulldown 4.68
## 6 PGK1 X.120P2fractionIP 25.6 Camk2a Pulldown 4.68
## 7 PGK1 X.117P2fractionIP 25.6 Camk2a Pulldown 4.68
## 8 PGK1 X.123P2fractionIP 25.7 Camk2a Pulldown 4.68
## 9 PGK1 X.137P2fractionIP 25.2 Camk2a Pulldown 4.65
## 10 PGK1 X.131P2fractionIP 24.8 Camk2a Pulldown 4.63
## # ℹ 218 more rows
# Plot
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 2.5, alpha = 0.9) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 3.5, fill = "white",
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) +
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel J: P2 Fraction Pulldown") +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank(),
legend.position = "top"
)
note:
For each protein, a t-test compares log2 abundances between the two groups.
Assigns significance labels:
*** for p < 0.001
** for p < 0.01
ns for not significant
ggplot(df_long, aes(x = Proteins, y = Log2Abundance, color = Group, shape = Group)) +
geom_jitter(position = position_dodge(width = 0.6), size = 3) +
stat_summary(fun = mean, geom = "point", shape = 95, size = 5,
position = position_dodge(width = 0.6), color = "black") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2,
position = position_dodge(width = 0.6), color = "black") +
geom_text(data = star_labels, aes(x = Proteins, y = y_pos, label = Significance),
inherit.aes = FALSE, size = 5) +
scale_shape_manual(values = c("Negative Pulldown" = 1, "Camk2a Pulldown" = 0)) +
scale_color_manual(values = c("Negative Pulldown" = "blue", "Camk2a Pulldown" = "red")) +
labs(x = NULL, y = "Logâ‚‚ Fold enrichment", title = "Panel I: P2fraction") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_rect(color = "black", fill = NA, linewidth = 1)
)