#install.packages("tidyverse")
#install.packages("ggplot2")
library(tidyverse)
library(readxl)
library(ggplot2)
#read in Bhavyaa's excel sheet
df <- read_excel("Transcripts per 1000 cells for BT.xlsx")
#View(df)
#See that the headers and first few rows are wrong
#re-import the excel sheet and skip the first few rows
df <- read_excel("Transcripts per 1000 cells for BT.xlsx", skip=4)
#View(df)
#It still looks wrong, what do I do?
#Realize it's because I skipped the incorrect number of rows
df <- read_excel("Transcripts per 1000 cells for BT.xlsx", skip=5)
#View(df)
#looks like columns 14 and 15 are empty artifacts from excel processing
df <- df[,-c(14:15)]
colnames(df) <- c('gene', (paste0("trt_",rep(1:3, each = 3))), 'log2_tcpl','log2_thr','gene family')
#?rep
#paste0
What does paste0("trt_",rep(1:3, each = 3)) do?
This is the same as:
colnames(df) <- c('gene', 'trt_1','trt_1','trt_1','trt_2','trt_2'...)
paste0 is a function that combines character values
together, defaulting with no space or separator between them. So the
first part of our character value is the phrase trt_.
The second part of our character value is a number generated by the
function rep(). This function replicates elements of a list
or a vector. Here, we are telling it to replicate each number from 1
through 3 (denoted through the colon syntax, 1:3, which
tells R we want all numbers from 1 through 3). We are also
using the argument each, which tells the function
rep() that we want to replicate each of our numbers 3
times. In sum, we are telling the rep() function to
replicate numbers 1:3 three times each.
Now, back to our paste() function, we are combining the
string trt_ with the number output from our
rep() function. Take note of how the rep()
function is nested inside the paste0 function. You can nest
any number of functions inside others, but large nests gets confusing
quickly and can/will crash your computer :-)
#re-import original
df <- read_excel("Transcripts per 1000 cells for BT.xlsx", skip=5)
## New names:
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
df <- df[,-c(14:15)]
#create treatment group names
groups <- paste0("trt_", rep(1:3, each = 3))
# Create a long dataframe with the `treatment` column
# t() is the transpose function
df_long <- data.frame(Gene = rep(df$Gene, each = 3),
Value = c(t(df[, 2:4]), t(df[, 5:7]), t(df[, 8:10])),
treatment = groups)
#save custom ggplot aesthetics that I will use in the next code chunk
gghisto <- list(
theme(axis.text.x = element_text(size=6, color = "Navyblue", angle=80),
axis.text.y = element_text(face="bold",
size=14),
axis.title=element_text(size=17),
plot.title = element_text(size=17,face="bold")))
color_palette <- c("pink","purple","lightblue")
ggplot(df_long[1:99,], aes(x = Gene, y = Value, color = treatment, fill = treatment)) +
geom_point(size = 4, shape = 21, alpha = 0.7, color = "black") +
scale_fill_manual(values = color_palette) +
scale_color_manual(values = color_palette) +
labs(x = "Gene", y = "Value", title = "Scatter Plot by Treatment") + gghisto