loading in libraries
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("tidyr")
library("tibble")
library("ggplot2")
reading in condition_annotations, SC_expression, and merged labels csv files
expression <-read.csv("SC_expression.csv", header=TRUE)
conditions <-read.csv("conditions_annotation.csv", header = TRUE)
mergedLabels <-read.csv("mergedLabels.csv")
Filtering condition data for “galactose”
galacdata <-conditions[grepl("galactose",conditions$secondary),]
Merge expression data with labels
expressionlabels<-merge(expression, mergedLabels, by.x="X", by.y = "gene")
selecting sequences by gene IDs for galactose
galacexpression <-expression%>% select(X,SAASAQ,SAASCC,SAASCF)
use pivot longer to make all treatment column names into a new column just named “treatment”
pivotgalac<-galacexpression %>%
pivot_longer(
cols=c(2:4),
names_to = "treatment",
values_to = "count")
creating a tibble for mean and median counts for all treatments
tibble<-pivotgalac %>%
group_by(treatment)%>%
summarise(
mean= mean(count),
median=median(count),
n=nrow(galacexpression)) %>%
as_tibble()
tibble
## # A tibble: 3 × 4
## treatment mean median n
## <chr> <dbl> <dbl> <int>
## 1 SAASAQ 165. 32.5 6071
## 2 SAASCC 165. 31.1 6071
## 3 SAASCF 165. 36.7 6071
creating violin plot to show counts for each condition tested with galactose. Mean is marked in pink, median is marked in blue. Outliers above 328 have been filtered out
filterpivotgalac<-pivotgalac %>% filter(count <328)
ggplot(filterpivotgalac, aes(x= treatment, y= count)) +
geom_violin()+
stat_summary(fun=mean, geom = "point", color= "pink", size = 2.5)+
stat_summary(fun=median, geom = "point", color = "skyblue", size = 2.5)