library(tidyverse)
library(ggsignif)
library(RColorBrewer)
library(readxl)
Cell viability in R
A quick and easy introduction to biological data analysis in R
About the data
The quantitative data is obtained from two independent cell viability assay experiments on a cell line with two compounds i.e, Dexamethasone and Vitamin B3. The final datasets used for the analysis can be accessed from this link cell viability data
Installing the required packages
Packages can be installed using the the command install.packages("package_name")
Following packages need to be installed:
install.packages("tidyverse")
install.packages("readxl")
install.packages("RColorBrewer")
install.packages("ggsignif")
Calling the package to the working environment
Importing the data from Excel
Setting working directory
Before you can import the data, the working directory needs to be set. You can set the working directory by using the following command:
setwd("complete_path_of_folder")
This should be the folder/directory in which the Excel file is stored
Reading the Excel file
<- read_excel("Cell_viability_data.xlsx",
dex_mtt sheet = "Dexamethasone")
<- read_excel("Cell_viability_data.xlsx",
vitb3_mtt sheet = "VitaminB3")
Data cleaning
|>
is the native pipe operator which when used transfers whatever is on the left of the pipe as the first argument in the function that is provided immediately after the pipe
<- dex_mtt |>
dex_mtt_clean #sets the column names according to the provided vector of strings
setNames(c("conc", "v1", "v2", "v3")) |>
#changing the datatype from character to factor so that the x axis of the plot is according to the order of concentrations in 'conc' column
mutate(conc = fct(conc)) |>
#creating a long-form table with all the numerical values in one column
pivot_longer(cols = 2:4, values_to = "viability")
#displays the first 6 rows
head(dex_mtt_clean)
# A tibble: 6 × 3
conc name viability
<fct> <chr> <dbl>
1 Control v1 100
2 Control v2 100
3 Control v3 100
4 50 v1 75.8
5 50 v2 91.5
6 50 v3 98.1
<- vitb3_mtt |>
vitb3_mtt_clean setNames(c("conc", "v1", "v2", "v3")) |>
mutate(conc = fct(conc)) |>
pivot_longer(cols = 2:4, values_to = "viability")
head(vitb3_mtt_clean)
# A tibble: 6 × 3
conc name viability
<fct> <chr> <dbl>
1 Control v1 100
2 Control v2 100
3 Control v3 100
4 1 v1 136.
5 1 v2 122.
6 1 v3 145.
Plotting the data using ggplot2
package
ggplot works in layers. So, after the ggplot() function, all the other functions following will be added using a ‘+’ symbol instead of the pipe operator |>
<- dex_mtt_clean |>
dex_mtt_clean_plot ggplot(aes(conc, viability, group = 1))+
theme_classic()+
stat_summary(geom = "errorbar", width = 0.2)+
stat_summary(geom = "line")+
stat_summary(geom = "point", color = brewer.pal(n = 11, name = "BrBG"), size = 2)+
theme(legend.position = "none",
plot.margin = margin(t = 50, b = 30, r = 30, l = 30, unit = "pt"),
axis.title = element_text(size = 15, color = "black"),
axis.title.x = element_text(vjust = -3),
axis.text = element_text(size = 10, colour = "black"))+
scale_x_discrete(name = "Dexamethasone Concentration (μM)")+
scale_y_continuous(name = "Percentage viability", limits = c(0, 170), breaks = seq(0,200, 20), expand = c(0,0))
<- vitb3_mtt_clean |>
vitb3_mtt_clean_plot ggplot(aes(conc, viability, group = 1))+
theme_classic()+
stat_summary(geom = "errorbar", width = 0.2)+
stat_summary(geom = "line")+
stat_summary(geom = "point", color = brewer.pal(n = 11, name = "Set3"))+
theme(legend.position = "none",
plot.margin = margin(t = 30, b = 30, r = 30, l = 30, unit = "pt"),
axis.title = element_text(size = 15, color = "black"),
axis.title.x = element_text(vjust = -3),
axis.text = element_text(size = 10, colour = "black"))+
scale_x_discrete(name = "Vitamin-B3 Concentration (mM)")+
scale_y_continuous(name = "Percentage viability", limits = c(0,200), breaks = seq(0, 200, 20), expand = c(0,0))
dex_mtt_clean_plot
vitb3_mtt_clean_plot
Performing a statistical analysis
Here, we will perform a one way Analysis of Variance or ANOVA. That will give us an overall significance of the experiment. Following that, we will go for a post-hoc analysis for multiple comparisons and get the exact significance or P-values between particular groups.
<- aov(viability ~ conc, data = dex_mtt_clean)
anova_result_dex
# Display the ANOVA results
summary(anova_result_dex)
Df Sum Sq Mean Sq F value Pr(>F)
conc 10 18234 1823.4 25.06 9.51e-10 ***
Residuals 22 1601 72.8
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the posthoc test results
TukeyHSD(anova_result_dex)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = viability ~ conc, data = dex_mtt_clean)
$conc
diff lwr upr p adj
50-Control -11.5197167 -36.41921 13.379778 0.8424058
100-Control -9.9521447 -34.85164 14.947350 0.9275916
150-Control -17.4594331 -42.35893 7.440061 0.3518195
200-Control -43.5474317 -68.44693 -18.647937 0.0001178
250-Control -47.7390016 -72.63850 -22.839507 0.0000310
300-Control -53.8173984 -78.71689 -28.917904 0.0000048
350-Control -57.6064846 -82.50598 -32.706990 0.0000016
400-Control -60.0397377 -84.93923 -35.140243 0.0000008
450-Control -65.5349008 -90.43440 -40.635406 0.0000002
500-Control -65.1814637 -90.08096 -40.281969 0.0000002
100-50 1.5675720 -23.33192 26.467067 1.0000000
150-50 -5.9397164 -30.83921 18.959778 0.9981423
200-50 -32.0277150 -56.92721 -7.128220 0.0051882
250-50 -36.2192849 -61.11878 -11.319790 0.0012992
300-50 -42.2976817 -67.19718 -17.398187 0.0001764
350-50 -46.0867678 -70.98626 -21.187273 0.0000523
400-50 -48.5200210 -73.41952 -23.620526 0.0000243
450-50 -54.0151841 -78.91468 -29.115690 0.0000045
500-50 -53.6617470 -78.56124 -28.762252 0.0000050
150-100 -7.5072884 -32.40678 17.392206 0.9886097
200-100 -33.5952870 -58.49478 -8.695792 0.0030950
250-100 -37.7868569 -62.68635 -12.887362 0.0007738
300-100 -43.8652537 -68.76475 -18.965759 0.0001063
350-100 -47.6543399 -72.55383 -22.754845 0.0000319
400-100 -50.0875930 -74.98709 -25.188098 0.0000149
450-100 -55.5827561 -80.48225 -30.683262 0.0000028
500-100 -55.2293190 -80.12881 -30.329824 0.0000032
200-150 -26.0879986 -50.98749 -1.188504 0.0348179
250-150 -30.2795684 -55.17906 -5.380074 0.0091891
300-150 -36.3579653 -61.25746 -11.458471 0.0012409
350-150 -40.1470514 -65.04655 -15.247557 0.0003559
400-150 -42.5803045 -67.47980 -17.680810 0.0001610
450-150 -48.0754677 -72.97496 -23.175973 0.0000279
500-150 -47.7220305 -72.62153 -22.822536 0.0000312
250-200 -4.1915698 -29.09106 20.707925 0.9999086
300-200 -10.2699667 -35.16946 14.629528 0.9134325
350-200 -14.0590528 -38.95855 10.840442 0.6395089
400-200 -16.4923060 -41.39180 8.407189 0.4272647
450-200 -21.9874691 -46.88696 2.912025 0.1158720
500-200 -21.6340320 -46.53353 3.265463 0.1276082
300-250 -6.0783968 -30.97789 18.821098 0.9977588
350-250 -9.8674830 -34.76698 15.032012 0.9310942
400-250 -12.3007361 -37.20023 12.598758 0.7866505
450-250 -17.7958992 -42.69539 7.103595 0.3274870
500-250 -17.4424621 -42.34196 7.457032 0.3530745
350-300 -3.7890862 -28.68858 21.110408 0.9999636
400-300 -6.2223393 -31.12183 18.677155 0.9972941
450-300 -11.7175024 -36.61700 13.181992 0.8290376
500-300 -11.3640653 -36.26356 13.535429 0.8525396
400-350 -2.4332531 -27.33275 22.466241 0.9999994
450-350 -7.9284163 -32.82791 16.971078 0.9831788
500-350 -7.5749791 -32.47447 17.324515 0.9878440
450-400 -5.4951631 -30.39466 19.404331 0.9990251
500-400 -5.1417260 -30.04122 19.757769 0.9994458
500-450 0.3534371 -24.54606 25.252932 1.0000000
<- aov(viability ~ conc, data = vitb3_mtt_clean)
anova_result_vitb3
# Display the ANOVA results
summary(anova_result_vitb3)
Df Sum Sq Mean Sq F value Pr(>F)
conc 10 17555 1755.5 26.71 5.07e-10 ***
Residuals 22 1446 65.7
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display the posthoc test results
TukeyHSD(anova_result_vitb3)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = viability ~ conc, data = vitb3_mtt_clean)
$conc
diff lwr upr p adj
1-Control 34.4826215 10.819178 58.146065 0.0012717
2-Control 38.1072101 14.443767 61.770653 0.0003617
3-Control 34.7061390 11.042696 58.369582 0.0011765
4-Control 28.4213144 4.757871 52.084758 0.0103742
5-Control -7.4857574 -31.149201 16.177686 0.9839342
6-Control -14.1919187 -37.855362 9.471525 0.5615950
7-Control -17.0463646 -40.709808 6.617079 0.3176164
8-Control -17.4546094 -41.118053 6.208834 0.2887082
9-Control -15.4287320 -39.092175 8.234711 0.4485802
10-Control -17.4714574 -41.134901 6.191986 0.2875542
2-1 3.6245886 -20.038855 27.288032 0.9999613
3-1 0.2235175 -23.439926 23.886961 1.0000000
4-1 -6.0613071 -29.724750 17.602136 0.9967055
5-1 -41.9683789 -65.631822 -18.304936 0.0000967
6-1 -48.6745402 -72.337984 -25.011097 0.0000106
7-1 -51.5289861 -75.192429 -27.865543 0.0000043
8-1 -51.9372309 -75.600674 -28.273788 0.0000038
9-1 -49.9113535 -73.574797 -26.247910 0.0000071
10-1 -51.9540789 -75.617522 -28.290636 0.0000037
3-2 -3.4010710 -27.064514 20.262372 0.9999785
4-2 -9.6858957 -33.349339 13.977548 0.9170619
5-2 -45.5929675 -69.256411 -21.929524 0.0000288
6-2 -52.2991288 -75.962572 -28.635685 0.0000034
7-2 -55.1535747 -78.817018 -31.490131 0.0000014
8-2 -55.5618194 -79.225263 -31.898376 0.0000012
9-2 -53.5359421 -77.199385 -29.872499 0.0000023
10-2 -55.5786674 -79.242111 -31.915224 0.0000012
4-3 -6.2848246 -29.948268 17.378619 0.9956209
5-3 -42.1918965 -65.855340 -18.528453 0.0000896
6-3 -48.8980578 -72.561501 -25.234614 0.0000098
7-3 -51.7525036 -75.415947 -28.089060 0.0000040
8-3 -52.1607484 -75.824192 -28.497305 0.0000035
9-3 -50.1348710 -73.798314 -26.471428 0.0000066
10-3 -52.1775964 -75.841040 -28.514153 0.0000035
5-4 -35.9070718 -59.570515 -12.243629 0.0007748
6-4 -42.6132331 -66.276676 -18.949790 0.0000778
7-4 -45.4676790 -69.131122 -21.804236 0.0000300
8-4 -45.8759238 -69.539367 -22.212480 0.0000263
9-4 -43.8500464 -67.513490 -20.186603 0.0000514
10-4 -45.8927718 -69.556215 -22.229328 0.0000261
6-5 -6.7061613 -30.369605 16.957282 0.9927989
7-5 -9.5606072 -33.224050 14.102836 0.9229659
8-5 -9.9688519 -33.632295 13.694591 0.9027117
9-5 -7.9429746 -31.606418 15.720469 0.9758110
10-5 -9.9856999 -33.649143 13.677743 0.9018128
7-6 -2.8544459 -26.517889 20.808997 0.9999959
8-6 -3.2626906 -26.926134 20.400753 0.9999854
9-6 -1.2368133 -24.900257 22.426630 1.0000000
10-6 -3.2795387 -26.942982 20.383905 0.9999847
8-7 -0.4082448 -24.071688 23.255199 1.0000000
9-7 1.6176326 -22.045811 25.281076 1.0000000
10-7 -0.4250928 -24.088536 23.238351 1.0000000
9-8 2.0258774 -21.637566 25.689321 0.9999998
10-8 -0.0168480 -23.680291 23.646595 1.0000000
10-9 -2.0427254 -25.706169 21.620718 0.9999998
Drawing significance bars on the plots
To add significance bars, we will use another layer with ggplot called geom_signif()
which come from the ggsignif
package. We will draw one significance bar on each of the graph.
For dexamethasone graph, we will draw the comparison of Control vs 200 μM which is p<0.001 (***)
For vitamin-b3 graph, we will draw the comparison of Control vs 1 mM which is p<0.01 (**)
#Dexamethasone
+
dex_mtt_clean_plotgeom_signif(comparisons = list(c("Control", "200")),
map_signif_level = TRUE,
annotations = "P<0.001",
textsize = 5,
margin_top = 0.4,
tip_length = c(0.4,0.2),
vjust = -1)
#Vitamin-b3
+
vitb3_mtt_clean_plotgeom_signif(
comparisons = list(c("Control", "1")),
map_signif_level = TRUE,
annotations = "P<0.01",
textsize = 5,
y_position = 170,
tip_length = c(0.23,0.7),
vjust = -1)