#This tutorial focuses on the use of dplyr for data manipulation
Step 1: Read in files from yeast expression experiment.
myconditions <- read.csv("conditions_annotation.csv")
myexpression <- read.csv("SC_expression.csv")
Step 2: Limit your search to one condition (pick a new condition for your script)
myfilter <- myconditions[grepl("wildtype",myconditions$primary),]
Step 3: Select expression data from only the annotations/column 1 of filtered list
myexpression2 <-
myexpression%>%
select(myfilter$ID)
Step 4: Make data tidy (one observation per row) to use ggplot/dplyr
tidyExpression <- myexpression2 %>% pivot_longer(cols = everything())
Step 5: Create summary of expression value stats in an easy to read tibble (formatted dataframe)
by_treatment <- tidyExpression %>%
group_by(name)
by_treatment %>%
summarise_all(list(mean = mean, median = median, n=length))
## # A tibble: 4 × 4
## name mean median n
## <chr> <dbl> <dbl> <int>
## 1 AFIQBR 165. 2.13 6071
## 2 AFIQCI 165. 3.31 6071
## 3 QCAQFI 165. 5.36 6071
## 4 QCAQFQ 165. 7.37 6071
Step 5: Plot violin plot of expression data
ggplot(tidyExpression, aes(x=name,y=log(value))) +
geom_violin()
## Warning: Removed 1098 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
Step 6: Pick a new condition and regenerate the tibble and plots for
this condition.