conditions = read.csv("conditions_annotation.csv")
expression = read.csv("SC_expression.csv")
str(conditions)
## 'data.frame': 92 obs. of 4 variables:
## $ ID : chr "AFIQCI" "AFIQBR" "AFIINC" "AFNAQI" ...
## $ primary : chr "wildtype" "wildtype" "itc1" "itc1" ...
## $ secondary : chr "wildtype 1" "wildtype 2" "itc1-1_dUTP" "itc1-1_dUTP" ...
## $ additional_information: chr "" "" "" "" ...
head(expression)
## X IFFABF SAASCC IFFAFF IFFAFA INICIA INICIF
## 1 YAL008W 70.8049308 30.970186 118.3846533 110.7039503 74.10224 89.80222
## 2 YBR255W 36.3687858 16.860290 27.7264519 34.5279281 35.44020 56.97130
## 3 YGR164W 0.3513892 0.904239 0.4560272 0.2926096 0.00000 1.10356
## 4 YGR131W 7.3791739 5.858715 12.1303227 12.6797476 11.27643 18.48464
## 5 YNL003C 82.0493864 78.480411 67.2184048 64.1790302 75.71315 75.31799
## 6 YBR135W 45.0656694 21.004719 53.8112060 42.9160688 17.72010 14.76012
## SAABQI FFNAAA IFAAAR AFIINC IFFABN SAARRS FFNARN
## 1 7.3415361 112.09117 5.9736681 1.5999440 80.906634 25.248508 92.87835
## 2 15.4521855 26.85875 5.3840882 3.2055216 35.588256 18.951094 20.78055
## 3 0.7691133 18.08554 0.7119455 0.6140630 0.399868 3.747059 16.48786
## 4 2.3772593 22.25159 4.4051630 0.4675893 10.263280 12.300555 14.73175
## 5 20.1367847 43.32689 30.3244305 3.6505765 84.638736 120.082451 38.53671
## 6 13.4944425 21.76147 6.7746068 2.4280840 35.055098 17.715153 12.87809
## SAABQR FFNAAR FAFASB SAABBN SAASCF INICIQ SAABFA
## 1 14.3453666 97.81174 9.706003 11.5621808 53.7957089 87.287511 7.1145370
## 2 19.8577066 26.51142 10.002635 14.0147646 23.5684783 47.303048 12.5800091
## 3 0.4648961 21.62535 1.101011 0.9009491 0.8542478 1.249514 0.7067421
## 4 4.2172721 19.09184 1.095667 1.5015819 9.5719563 22.937516 0.9423228
## 5 20.5550508 48.86063 10.529089 15.2660828 126.0782168 95.498606 12.5800091
## 6 12.4857821 22.98259 12.103108 14.0147646 24.3789186 10.977877 9.3289955
## IFFAFQ SAABRQ IFFAFB INICIA.1 QCFQIA FFNAAC QNCSCN
## 1 123.6629036 10.232139 100.5829827 148.0641808 7.210451 103.392926 25.230781
## 2 32.2781138 14.944556 32.7070538 37.2228387 13.255577 27.390313 24.827382
## 3 0.5589284 3.961434 0.3516888 0.3308697 2.840481 3.524759 4.327372
## 4 13.1348169 4.505896 9.6128259 546.9275773 2.348859 18.431551 4.877462
## 5 67.9097979 105.325323 67.4070106 60.5491510 17.133926 47.143649 43.933831
## 6 44.9937344 13.236070 38.8029922 8.2717419 18.372084 27.684043 39.533113
## SAABQQ INICIC INICIB IAAICI SAABIC FAFAFF IFAAAA
## 1 11.3035156 119.7574980 64.4814982 13.098598 15.303059 3.420917 6.147074
## 2 14.4192283 21.0620018 44.9510444 19.570847 8.763290 3.890629 5.891871
## 3 0.7245843 0.3481323 0.9300216 2.157416 1.613143 2.488141 1.054101
## 4 2.8983373 438.1244514 22.9405330 2.465618 2.833900 1.650637 4.660237
## 5 15.5061048 80.9407508 93.6221753 27.121803 20.622071 5.895765 28.438540
## 6 9.7094301 16.5362824 12.2452845 38.525289 11.684387 4.342615 7.522954
## SAABIB SAARAA QCFFAB INICSR AFNCCR INICIR SAABFA.1
## 1 14.107651 17.562015 7.386431 84.979729 1.6045494 80.5725155 4.6628026
## 2 9.637325 23.270167 14.158209 63.216628 3.4161373 38.1860263 7.5517129
## 3 1.393348 4.713701 2.151285 7.254367 0.6314678 0.7637205 0.6081916
## 4 4.412269 11.197525 2.278454 14.508734 0.4934421 26.0619629 0.7602396
## 5 19.913269 125.539570 17.941502 42.489865 4.6790730 113.1261029 9.6297010
## 6 12.946527 23.309945 18.058074 7.254367 3.9544378 10.2147620 6.4873775
## SAARRR IFFABI SARICB INICSA QCAQFI FAFAFQ SAASAQ
## 1 32.3772163 86.3460797 1.4843492 40.752473 21.094260 3.799670 35.9634167
## 2 30.5309261 36.2690356 6.2972391 31.157837 1.362831 4.153840 17.6560149
## 3 0.8056539 0.4602669 0.1349408 9.891377 1.836860 2.662830 0.8228046
## 4 17.1369299 9.1132856 0.6297239 28.190424 2.133127 1.912952 5.7767738
## 5 123.0132806 76.4963670 4.5430082 126.708538 16.502111 6.383796 91.7255686
## 6 69.4037270 50.6293644 2.4739154 26.706718 10.961905 4.580155 16.7646433
## IAAICF INICIS AFIQBR FABINF FABINB IFFABQ IFFAFR
## 1 10.0163057 71.3208488 0.9131177 2.9034705 2.973727 75.5213649 78.871847
## 2 29.7359076 18.4637731 1.7245724 2.6832326 3.363441 33.7479669 37.446574
## 3 1.9823938 0.1206783 0.4639525 1.5802074 2.524501 0.4115606 1.170205
## 4 0.8346921 359.8625571 0.1663575 0.4312994 0.514499 8.4369917 8.191438
## 5 34.1180413 80.3717180 2.0868620 5.2324870 5.960893 74.9040241 75.712292
## 6 30.8836093 17.6190253 1.6136674 3.8082815 4.501866 41.3618375 37.446574
## FFNAAQ QCAQFQ IFFAFA.1 AFNCCA QNCSCS AFIQCI QNCSCQ
## 1 126.891141 34.945444 117.398963 3.244893 21.068795 1.5778249 25.953675
## 2 30.376063 6.604097 28.296160 7.346928 23.305008 2.7106923 23.344930
## 3 4.090045 1.480739 0.301023 1.390668 2.353030 0.5432304 2.842864
## 4 19.253137 1.332665 10.686316 1.128278 4.514146 0.4012807 4.715810
## 5 45.589034 17.857714 59.301527 9.953338 41.645290 3.7835040 48.897260
## 6 31.473392 12.823201 40.638102 7.361505 54.478481 2.5196062 40.034213
## SAABFI IFFABC IFFAFC QCAQFS INICII IAAICQ SAABFR
## 1 15.434716 77.3586657 106.1655599 44.996731 128.2320386 15.528619 9.8278590
## 2 13.780115 37.1147756 39.8865001 51.122792 47.1169594 34.503537 7.9692954
## 3 1.728688 0.1738397 0.3968806 1.192684 0.1847724 1.419064 0.5632011
## 4 4.667458 11.5603399 7.3422911 2.168517 443.0841912 2.108324 1.3235226
## 5 22.695207 79.6185818 71.4385076 31.118221 42.6824221 39.490535 12.4185840
## 6 18.620442 54.5856652 36.9098956 21.929130 8.4995299 28.056930 12.4185840
## SAABFN QNCSCI IAAICS SAABFS QCAQFN SICIBA SAABQS
## 1 14.5538531 24.428979 11.445349 10.399637 43.5405506 46.13813 11.8666284
## 2 20.0536382 21.706792 21.212047 13.187547 59.0769248 124.16212 17.2840022
## 3 0.7482701 2.969659 1.678651 1.815810 2.4603867 13.46222 0.5159404
## 4 4.1528990 4.065604 2.619713 3.741668 0.4976063 35.45683 2.8376720
## 5 19.2305412 45.287296 25.281504 21.514593 35.1088884 63.61373 22.5723909
## 6 15.5640177 45.393356 35.149938 15.681992 17.3332859 22.75305 12.2535837
## FAFAQQ IFFABS SAABIF FFNARI INICIN FFNAAA.1 SAABIQ
## 1 10.106824 88.473574 12.3602202 153.113996 75.6719123 95.29289 12.953973
## 2 9.793654 36.050109 21.6454588 22.203842 33.9282329 20.87077 21.543401
## 3 1.610132 0.000000 0.7838188 5.088381 0.7355714 15.83124 1.222073
## 4 1.357066 8.904808 3.0749816 20.816102 22.3429826 22.34700 2.199731
## 5 12.805140 83.733918 22.0072214 32.380603 119.5303597 39.75627 21.124404
## 6 14.326699 47.971061 10.0690575 20.816102 12.5966610 19.49635 10.684409
## AFNAQI SAANNN IFFABB FFNARS SAABQF IFFAFS IFAAAA.1
## 1 1.638143 34.0660510 81.346495 127.981129 14.649997 101.58666703 6.5740958
## 2 3.547613 28.9971045 35.137835 31.181031 13.574768 30.90338738 6.4596733
## 3 0.712236 0.9045595 0.000000 2.586445 1.209633 0.08218986 0.9777927
## 4 0.491782 11.0253852 7.059656 21.936885 2.016055 9.36964405 4.6913247
## 5 4.232717 131.2123246 84.715876 41.766297 17.539675 66.16283735 26.7124654
## 6 2.482651 59.9057309 47.010893 25.002301 10.953897 43.06748667 7.0525902
## IFFAFI SARIAI
## 1 110.2845047 1.48926863
## 2 29.0860232 5.11936093
## 3 0.1009931 0.04653964
## 4 12.0181832 0.93079290
## 5 75.2398865 3.90933016
## 6 42.9220829 2.23390295
# Filter for itc1 conditions
myFilter = conditions[grepl("itc1",
conditions$primary), ]
print(myFilter)
## ID primary secondary additional_information
## 3 AFIINC itc1 itc1-1_dUTP
## 4 AFNAQI itc1 itc1-1_dUTP
Select only the columns correspodning to our filtered conditions.
# Only need to add the gene names column (X) plus our condition columns
selectedColumns = c("X",
myFilter$ID)
expressionFiltered = expression %>%
select(all_of(selectedColumns))
# Check structure
str(expressionFiltered)
## 'data.frame': 6071 obs. of 3 variables:
## $ X : chr "YAL008W" "YBR255W" "YGR164W" "YGR131W" ...
## $ AFIINC: num 1.6 3.206 0.614 0.468 3.651 ...
## $ AFNAQI: num 1.638 3.548 0.712 0.492 4.233 ...
Convert to long format, excluding gene names column.
tidyExpression = expressionFiltered %>%
pivot_longer(cols = -X,
names_to = "treatment",
values_to = "expressionValue")
# Check the first few rows
head(tidyExpression)
## # A tibble: 6 × 3
## X treatment expressionValue
## <chr> <chr> <dbl>
## 1 YAL008W AFIINC 1.60
## 2 YAL008W AFNAQI 1.64
## 3 YBR255W AFIINC 3.21
## 4 YBR255W AFNAQI 3.55
## 5 YGR164W AFIINC 0.614
## 6 YGR164W AFNAQI 0.712
Group the treatment and calculate summary statistics.
summaryStats = tidyExpression %>%
group_by(treatment) %>%
summarize(
mean = mean(expressionValue,
na.rm = TRUE),
median = median(expressionValue,
na.rm = TRUE),
n = n(),
.groups = "drop"
)
print(summaryStats)
## # A tibble: 2 × 4
## treatment mean median n
## <chr> <dbl> <dbl> <int>
## 1 AFIINC 165. 3.62 6071
## 2 AFNAQI 165. 3.97 6071
Create enhanced violin plot with log transformation.
violinPlot = ggplot(tidyExpression,
aes(x = treatment,
y = log(expressionValue + 1))) +
# Main violin plot showing distribution density
geom_violin(aes(fill = treatment),
alpha = 0.7,
trim = FALSE) +
# Overlay boxplot to show quartiles and outliers
geom_boxplot(width = 0.2,
alpha = 0.9,
outlier.size = 1.5,
outlier.alpha = 0.9) +
# Labels and titles
labs(
title = "Distribution of Gene Expression Values by Treatment",
subtitle = "ITC1 conditions - Log-transformed expression values",
x = "Treatment Condition",
y = "Log(Expression Value +1)") +
# Apply minimal theme and customize text sizes
theme_minimal() +
theme(
# Make axis titles large and bold
axis.title.x = element_text(size = 16,
face = "bold"),
axis.title.y = element_text(size = 16,
face = "bold"),
# Make tick labels large
axis.text.x = element_text(size = 14,
angle = 45,
hjust = 1),
axis.text.y = element_text(size = 14),
# Enhance plot titles
plot.title = element_text(size = 18,
face = "bold"),
plot.subtitle = element_text(size = 14),
# Remove legend
legend.position = "none"
) +
# Use distinct colors for each treatment
scale_fill_viridis_d(option = "plasma")
print(violinPlot)
I chose “itc1 as the condition instead of wildtype, using ‘all_of()’ to avoid conflicts and adding log(expressionValue + 1) transformation to handle zero values. Boxplot overlay, oulier points, and enhanced theming were applied to the violin plot.