Samantha Sterner

Breast Cancer Cells Analysis

March 31 2025

Load RMarkdown

library(rmarkdown)

Set messages to FALSE

knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE, collapse=TRUE)

Load Packages

library(reshape2) library(ggplot2) library(dplyr) library(plotly) library(viridis) library(data.table) library(pheatmap) library(tidyverse) library(ggthemes) library(clipr) library(tidyr) library(Rcpp)

Load colours

mycolors<-c(viridis(15)) felix_cols<-mycolors[c(5,2)] felix_4cols<-mycolors[c(15,10,8,2)] plain_cols1<-c(“blue”,“gray”) plain_cols2<-c(“red”,“gray”) pats_cols<-colorRampPalette(c(“#FDE725FF”, “white”,“#440154FF”))(21) leos_cols<-colorRampPalette(c(“white”,“blue”))(10)

Load dataset

breast_cancer_cells<-read_csv(file=“breast_cancer_cells.csv”)

Make matrix of raw data

breast_cancer_cells_mat1<-breast_cancer_cells %>% select(MCF10A_1: MCF10A_2: MCF7_1: MCF7_2: MDA231_1: MDA231_2: MDA468_1: MDA468_2: SKBR3_1: SKBR3_2) %>% as.matrix() %>% round(.,2) head(breast_cancer_cells_mat1)

Initial heatmap

pheatmap(breast_cancer_cells_mat1,color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=FALSE,legend=TRUE,fontsize = 7,scale=“column”)

Make new columns

breast_cancer_cells_data2<-breast_cancer_cells %>% mutate ( mean_control= ((MCF10A_1+MCF10A_2)/(2)), mean_MCF7= ((MCF7_1+MCF7_2)/(2)), mean_MDA231= ((MDA231_1+MDA231_2)/(2)), mean_MDA468= ((MDA468_1+MDA468_2)/(2)), mean_SKBR3= (SKBR3_1+SKBR3_2)/(2))

Make columns of log values

breast_cancer_cells_data2<-breast_cancer_cells2 %>% mutate (, log_MCF7= log2((mean_MCF7)/(mean_control)), log_MDA231= log2((mean_MDA231)/(mean_control)), log_MDA468= log2((mean_MDA468)/(mean_control)), log_SKBR3= log2((mean_SKBR3)/(mean_control)))

colnames(breast_cancer_cells_data2)

Make a matrix of just log values

breast_cancer_cells_mat2<-breast_cancer_cells_data2 %>% select( log_MCF7:log_MDA231:log_MDA468:log_SKBR3) %>% as.matrix() %>% round(.,2)

Second heatmap

pheatmap(breast_cancer_cells_mat2, color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=FALSE,legend=TRUE,fontsize = 7,scale=“column”)

Volcano plot

breast_cancer_cells_data2<-breast_cancer_cells_data2 %>% mutate(neglog_SKBR3=-log10(pvalue_SKBR3_vs_MCF10A)) volcano_plot<-breast_cancer_cells_data2 %>% ggplot(aes(x=log_SKBR3,y=neglog_SKBR3, description=Gene_Symbol))+geom_point(alpha=0.7, color=“black”) breast_cancer_cells_data2<-breast_cancer_cells_data2 %>% mutate(significance=ifelse((log_SKBR3>5 & neglog_SKBR3>1),“UP”,ifelse((log_SKBR3<c(-4) & neglog_SKBR3>2), “DOWN”, “NOT SIG”))) plain_cols3<-c(“darkgreen”,“gray”,“darkblue”)

View plot

volcano_plot

Define Significance for Better Volcano Plot

breast_cancer_cells_data2<-breast_cancer_cells_data2 %>% mutate(significance=ifelse((log_SKBR3>5 & neglog_SKBR3>1), “UP”, ifelse ((log_SKBR3<c(-4) & neglog_SKBR3>2),“DOWN”,“NOT SIG”))) plain_cols3<-c(“darkgreen”,“gray”,“darkblue”)

Better Volcano Plot

better_volcano_plot<-breast_cancer_cells_data2 %>% ggplot(aes(x=log_SKBR3,y=neglog_SKBR3,description=Gene_Symbol,color=significance))+geom_point(alpha=0.7)+scale_color_manual(values=plain_cols3)+xlim(-6,6)+theme_bw()+theme(axis.text= element_text(colour= “black”,size=14))+theme(text=element_text(size=14))+labs(x=“log ratio of SKBR3 gene compared to control”, y=“-log(p-value)”)

View Better Volcano Plot

better_volcano_plot

GGPlot Volcano Plot

ggplotly(better_volcano_plot)

Bar Plots

breast_cancer_cells_long<-pivot_longer(breast_cancer_cells, cols = c(MCF10A_1,MCF10A_2,MCF7_1,MCF7_2,MDA231_1,MDA231_2,MDA468_1,MDA468_2,SKBR3_1,SKBR3_2), names_to = ‘variable’) %>% select(-c(pvalue_MCF7_vs_MCF10A:pvalue_SKBR3_vs_MCF10A)) breast_cancer_cells_long\(order<-as.character(breast_cancer_cells_long\)variable) breast_cancer_cells_long\(order<-factor(breast_cancer_cells_long\)order,levels=c(‘MCF10A_1’, ‘MCF10A_2’,‘MCF7_1’,‘MCF7_2’,‘MDA231_1’,‘MDA231_2’,‘MDA468_1’,‘MDA468_2’,‘SKBR3_1’,‘SKBR_2’)) examples_down<-breast_cancer_cells_long %>% filter(Gene_Symbol==“APOA1” | Gene_Symbol==“HLA-A” | Gene_Symbol==“CDH3”) example_plot_down<- examples_down %>% ggplot(aes(x=variable,y=value))+geom_bar(stat=“identity”,fill=“red”)+facet_wrap(~Gene_Symbol)+theme_bw()+theme(axis.text=element_text(colour=“black”, size=10))+theme(text=element_text(size=14))+theme(axis.text.x=element_text(angle=45,hjust=1))+labs(x=“sample”,y=“relative intensity”) examples_up<-breast_cancer_cells_long %>% filter(Gene_Symbol==“GPNMB” | Gene_Symbol==“C17orf28” | Gene_Symbol==“CAPS”) examples_plot_up<-examples_up %>% ggplot(aes(x=variable,y=value))+geom_bar(stat=“identity”,fill=“red”)+facet_wrap(~Gene_Symbol)+theme_bw()+theme(axis.text=element_text(colour=“black”, size=10))+theme(text=element_text(size=14))+theme(axis.text.x=element_text(angle=45,hjust=1))+labs(x=“sample”,y=“relative intensity”)

View Plots

example_plot_down examples_plot_up

Upregulated Genes

upregulated_genes<-examples_up %>% filter(Gene_Symbol==“GPNMB” | Gene_Symbol == “C17orf28”) %>% select(Gene_Symbol, Description) head(upregulated_genes)

Downregulated Genes

downregulated_genes<-examples_down %>% filter(Gene_Symbol==“APOA1”|Gene_Symbol==“HLA-A”|Gene_Symbol==“CDH3”) %>% select(Gene_Symbol, Description) head(downregulated_genes)