This project contains a workflow for examining the effect of ocean acidification on gene expression of snow crabs. It runs using paired end RNA-Seq data prepared and provided by Laura Spencer. 63 juvenile crabs were exposed to the following pH treatments:
On 7/20/2021, after exposure, all crabs were sacrificed by puncturing the carapace through the cardiac region. Crabs were preserved overnight at 4C prior to being transferred to -80C. mRNA samples were then extracted. Each of the 63 mRNA samples were run in both lanes of NovaSeq 6000, for a grand total of 126 paired-end RNA-Seq data sets. Sequence data received January 3, 2022.
#there are 63 files within /5010
ls /home/shared/8TB_HDD_01/snow_crab/5010
cd ../data
curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_genome_v1.0.fasta
#index "cbai_genome_v1.0.fasta" and rename it as "cbai_genome_v1.0.index"
/home/shared/kallisto/kallisto \
index -i \
../data/cbai_genome_v1.0.index \
../data/cbai_genome_v1.0.fasta
#list file names
ls /home/shared/8TB_HDD_01/snow_crab/5010/*fastq.gz
#make a new directory in output
mkdir ../output/kallisto_paired
#find specific files and create abundance estimates
find /home/shared/8TB_HDD_01/snow_crab/5010/*fastq.gz \
| xargs basename -s _L003_R1_001.fastq.gz | xargs -I{} \
/home/shared/kallisto/kallisto \
quant -i ../data/cbai_genome_v1.0.index \
-o ../output/kallisto_paired/{} \
-t 40 \
/home/shared/8TB_HDD_01/snow_crab/5010/{}_L003_R1_001.fastq.gz \
/home/shared/8TB_HDD_01/snow_crab/5010/{}_L003_R2_001.fastq.gz
# delete empty files
rm ../output/kallisto_paired/*R2_001.fastq.gz
Run abundance_estimates_to_matrix.pl script from the Trinity RNA-seq assembly software package.
ls ~/courtney_RNAseq_crabs/output/kallisto_paired
#use these folder names for perl input
#Sample name input for perl
perl /home/shared/trinityrnaseq-v2.12.0/util/abundance_estimates_to_matrix.pl \
--est_method kallisto \
--gene_trans_map none \
--out_prefix ../output/kallisto_paired \
--name_sample_by_basedir \
../output/kallisto_paired/5010_1_S1/abundance.tsv \
../output/kallisto_paired/5010_2_S2/abundance.tsv \
../output/kallisto_paired/5010_3_S3/abundance.tsv \
../output/kallisto_paired/5010_4_S4/abundance.tsv \
../output/kallisto_paired/5010_5_S5/abundance.tsv \
../output/kallisto_paired/5010_6_S6/abundance.tsv \
../output/kallisto_paired/5010_7_S7/abundance.tsv \
../output/kallisto_paired/5010_8_S8/abundance.tsv \
../output/kallisto_paired/5010_9_S9/abundance.tsv \
../output/kallisto_paired/5010_10_S10/abundance.tsv \
../output/kallisto_paired/5010_11_S11/abundance.tsv \
../output/kallisto_paired/5010_12_S12/abundance.tsv \
../output/kallisto_paired/5010_13_S13/abundance.tsv \
../output/kallisto_paired/5010_14_S14/abundance.tsv \
../output/kallisto_paired/5010_15_S15/abundance.tsv \
../output/kallisto_paired/5010_16_S16/abundance.tsv \
../output/kallisto_paired/5010_17_S17/abundance.tsv \
../output/kallisto_paired/5010_18_S18/abundance.tsv \
../output/kallisto_paired/5010_19_S19/abundance.tsv \
../output/kallisto_paired/5010_20_S20/abundance.tsv \
../output/kallisto_paired/5010_21_S21/abundance.tsv \
../output/kallisto_paired/5010_22_S22/abundance.tsv \
../output/kallisto_paired/5010_23_S23/abundance.tsv \
../output/kallisto_paired/5010_24_S24/abundance.tsv \
../output/kallisto_paired/5010_25_S25/abundance.tsv \
../output/kallisto_paired/5010_26_S26/abundance.tsv \
../output/kallisto_paired/5010_27_S27/abundance.tsv \
../output/kallisto_paired/5010_28_S28/abundance.tsv \
../output/kallisto_paired/5010_29_S29/abundance.tsv \
../output/kallisto_paired/5010_30_S30/abundance.tsv \
../output/kallisto_paired/5010_31_S31/abundance.tsv \
../output/kallisto_paired/5010_32_S32/abundance.tsv \
../output/kallisto_paired/5010_33_S33/abundance.tsv \
../output/kallisto_paired/5010_34_S34/abundance.tsv \
../output/kallisto_paired/5010_35_S35/abundance.tsv \
../output/kallisto_paired/5010_36_S36/abundance.tsv \
../output/kallisto_paired/5010_37_S37/abundance.tsv \
../output/kallisto_paired/5010_38_S38/abundance.tsv \
../output/kallisto_paired/5010_39_S39/abundance.tsv \
../output/kallisto_paired/5010_40_S40/abundance.tsv \
../output/kallisto_paired/5010_41_S41/abundance.tsv \
../output/kallisto_paired/5010_42_S42/abundance.tsv \
../output/kallisto_paired/5010_43_S43/abundance.tsv \
../output/kallisto_paired/5010_44_S44/abundance.tsv \
../output/kallisto_paired/5010_45_S45/abundance.tsv \
../output/kallisto_paired/5010_46_S46/abundance.tsv \
../output/kallisto_paired/5010_47_S47/abundance.tsv \
../output/kallisto_paired/5010_48_S48/abundance.tsv \
../output/kallisto_paired/5010_49_S49/abundance.tsv \
../output/kallisto_paired/5010_50_S50/abundance.tsv \
../output/kallisto_paired/5010_51_S51/abundance.tsv \
../output/kallisto_paired/5010_52_S52/abundance.tsv \
../output/kallisto_paired/5010_53_S53/abundance.tsv \
../output/kallisto_paired/5010_54_S54/abundance.tsv \
../output/kallisto_paired/5010_55_S55/abundance.tsv \
../output/kallisto_paired/5010_56_S56/abundance.tsv \
../output/kallisto_paired/5010_57_S57/abundance.tsv \
../output/kallisto_paired/5010_58_S58/abundance.tsv \
../output/kallisto_paired/5010_59_S59/abundance.tsv \
../output/kallisto_paired/5010_60_S60/abundance.tsv \
../output/kallisto_paired/5010_61_S61/abundance.tsv \
../output/kallisto_paired/5010_62_S62/abundance.tsv \
../output/kallisto_paired/5010_63_S63/abundance.tsv
countmatrix <- read.delim("../output/kallisto_paired.isoform.counts.matrix", header = TRUE, sep = '\t')
rownames(countmatrix) <- countmatrix$X
countmatrix <- countmatrix[,-1]
head(countmatrix)
## X5010_1_S1 X5010_2_S2 X5010_3_S3 X5010_4_S4 X5010_5_S5 X5010_6_S6
## contig_3677 826.95200 84.39420 311.85700 44.0388 73.00000 101.28200
## contig_551 10.78500 10.22240 6.04837 0.0000 8.52952 0.00000
## contig_2354 1.25878 2.45334 0.00000 0.0000 0.00000 1.17853
## contig_1623 1.27709 20.22590 0.00000 0.0000 174.84500 0.00000
## contig_3784 318.48400 276.34100 173.72400 191.0710 260.15200 179.54500
## contig_453 0.00000 0.00000 0.00000 0.0000 2.01666 1.00364
## X5010_7_S7 X5010_8_S8 X5010_9_S9 X5010_10_S10 X5010_11_S11
## contig_3677 105.12200 170.98900 358.74500 150.1950 136.84000
## contig_551 1.45547 2.74287 7.74467 13.3783 17.61910
## contig_2354 1.15811 1.30359 0.00000 0.0000 3.89947
## contig_1623 0.00000 24.25130 1.67627 5.4100 0.00000
## contig_3784 163.23900 364.76700 542.24400 478.7500 255.99000
## contig_453 0.00000 0.00000 0.00000 0.0000 0.00000
## X5010_12_S12 X5010_13_S13 X5010_14_S14 X5010_15_S15 X5010_16_S16
## contig_3677 83.05630 584.46000 137.85000 113.1400 93.61910
## contig_551 5.27151 4.26126 2.71456 12.3357 1.61551
## contig_2354 5.12464 2.43988 3.47747 0.0000 7.38579
## contig_1623 13.83930 8.43169 0.00000 0.0000 0.00000
## contig_3784 274.07300 319.06000 465.23300 595.2260 400.90900
## contig_453 0.00000 0.00000 1.03679 0.0000 1.00969
## X5010_17_S17 X5010_18_S18 X5010_19_S19 X5010_20_S20 X5010_21_S21
## contig_3677 125.35900 66.15110 82.31020 64.0000 108.76500
## contig_551 9.35177 19.44690 3.40289 2.3116 6.61237
## contig_2354 1.13798 4.58402 8.48434 0.0000 3.88187
## contig_1623 6.43131 217.67600 4.40942 24.3209 11.95220
## contig_3784 351.61900 288.11600 210.51400 224.6160 181.11400
## contig_453 0.00000 0.00000 0.00000 0.0000 0.00000
## X5010_22_S22 X5010_23_S23 X5010_24_S24 X5010_25_S25 X5010_26_S26
## contig_3677 104.25700 60.57210 92.19830 88.12340 112.90400
## contig_551 3.83241 8.09999 19.44950 22.75870 6.34299
## contig_2354 3.95112 5.51476 3.44172 2.98265 6.92777
## contig_1623 4.32650 329.15600 7.19839 1.53834 5.98053
## contig_3784 438.27900 446.79800 390.37000 402.17700 274.57000
## contig_453 0.00000 0.00000 1.01785 0.00000 1.00131
## X5010_27_S27 X5010_28_S28 X5010_29_S29 X5010_30_S30 X5010_31_S31
## contig_3677 187.58500 173.37500 58.65560 63.05000 86.70020
## contig_551 4.00957 1.34195 3.81138 4.12474 5.61875
## contig_2354 2.38835 1.17504 2.45281 1.30954 0.00000
## contig_1623 6.68866 0.00000 4.35702 3.78490 20.16760
## contig_3784 226.78200 299.39300 207.17600 303.25700 248.22300
## contig_453 0.00000 2.04331 1.02055 0.00000 0.00000
## X5010_32_S32 X5010_33_S33 X5010_34_S34 X5010_35_S35 X5010_36_S36
## contig_3677 104.05700 106.32200 56.00000 74.86760 28.01630
## contig_551 16.47710 2.92159 5.93593 3.51616 4.96861
## contig_2354 3.20801 5.26595 0.00000 6.08613 0.00000
## contig_1623 1.59525 0.00000 8.84724 7.15886 0.00000
## contig_3784 262.01100 371.65200 385.30800 303.46400 203.68800
## contig_453 0.00000 1.11682 0.00000 1.01366 0.00000
## X5010_37_S37 X5010_38_S38 X5010_39_S39 X5010_40_S40 X5010_41_S41
## contig_3677 204.29300 190.90400 77.13010 173.5560 140.91500
## contig_551 0.00000 2.94719 5.68182 11.0547 6.75250
## contig_2354 1.10422 0.00000 4.87875 0.0000 0.00000
## contig_1623 1.81430 2.45500 315.48200 17.7316 7.82375
## contig_3784 373.87600 365.95300 224.82100 220.8530 351.72500
## contig_453 0.00000 0.00000 0.00000 0.0000 1.00713
## X5010_42_S42 X5010_43_S43 X5010_44_S44 X5010_45_S45 X5010_46_S46
## contig_3677 94.05610 57.43530 138.81300 2.00000 188.75100
## contig_551 12.73010 1.35086 15.93280 0.00000 3.95477
## contig_2354 1.52899 0.00000 2.68201 0.00000 6.08133
## contig_1623 4.67600 0.00000 8.68860 11.08320 3.29040
## contig_3784 403.33500 343.48600 888.41600 4.29281 226.22800
## contig_453 3.03447 0.00000 0.00000 0.00000 0.00000
## X5010_47_S47 X5010_48_S48 X5010_49_S49 X5010_50_S50 X5010_51_S51
## contig_3677 127.25200 107.37900 101.79000 106.95600 370.35600
## contig_551 10.78060 4.05752 3.54854 8.10482 14.80880
## contig_2354 2.75001 1.27826 0.00000 4.94971 6.47401
## contig_1623 2.34736 0.00000 0.00000 0.00000 1.42552
## contig_3784 321.39000 533.80300 546.13900 472.58100 424.91900
## contig_453 0.00000 0.00000 0.00000 4.12687 0.00000
## X5010_52_S52 X5010_53_S53 X5010_54_S54 X5010_55_S55 X5010_56_S56
## contig_3677 115.70400 99.25490 143.47200 187.78900 100.06500
## contig_551 29.59790 15.40760 10.05300 1.50031 5.34908
## contig_2354 6.97468 5.57517 9.89977 4.45763 5.89888
## contig_1623 3.87394 0.00000 3.41067 251.32300 3.32341
## contig_3784 310.96100 662.58300 385.76500 331.84300 505.78300
## contig_453 0.00000 2.00339 0.00000 0.00000 1.02605
## X5010_57_S57 X5010_58_S58 X5010_59_S59 X5010_60_S60 X5010_61_S61
## contig_3677 84.15200 129.472 116.00000 57.5758 120.76200
## contig_551 3.14446 0.000 3.81271 15.8914 9.21032
## contig_2354 2.59376 0.000 0.00000 7.2040 10.94710
## contig_1623 297.32700 183.620 6.57290 13.4404 2.64150
## contig_3784 375.88700 561.064 320.28700 425.2500 394.29200
## contig_453 2.08115 0.000 0.00000 0.0000 0.00000
## X5010_62_S62 X5010_63_S63
## contig_3677 97.69350 128.18400
## contig_551 23.82790 2.46923
## contig_2354 2.34724 1.11446
## contig_1623 2.75472 0.00000
## contig_3784 354.43700 366.75000
## contig_453 0.00000 0.00000
## 'data.frame': 3294 obs. of 63 variables:
## $ X5010_1_S1 : num 827 11 1 1 318 ...
## $ X5010_2_S2 : num 84 10 2 20 276 ...
## $ X5010_3_S3 : num 312 6 0 0 174 ...
## $ X5010_4_S4 : num 44 0 0 0 191 0 27 141 878 2160 ...
## $ X5010_5_S5 : num 73 9 0 175 260 ...
## $ X5010_6_S6 : num 101 0 1 0 180 ...
## $ X5010_7_S7 : num 105 1 1 0 163 ...
## $ X5010_8_S8 : num 171 3 1 24 365 ...
## $ X5010_9_S9 : num 359 8 0 2 542 ...
## $ X5010_10_S10: num 150 13 0 5 479 ...
## $ X5010_11_S11: num 137 18 4 0 256 ...
## $ X5010_12_S12: num 83 5 5 14 274 ...
## $ X5010_13_S13: num 584 4 2 8 319 ...
## $ X5010_14_S14: num 138 3 3 0 465 ...
## $ X5010_15_S15: num 113 12 0 0 595 ...
## $ X5010_16_S16: num 94 2 7 0 401 ...
## $ X5010_17_S17: num 125 9 1 6 352 ...
## $ X5010_18_S18: num 66 19 5 218 288 ...
## $ X5010_19_S19: num 82 3 8 4 211 ...
## $ X5010_20_S20: num 64 2 0 24 225 ...
## $ X5010_21_S21: num 109 7 4 12 181 ...
## $ X5010_22_S22: num 104 4 4 4 438 ...
## $ X5010_23_S23: num 61 8 6 329 447 ...
## $ X5010_24_S24: num 92 19 3 7 390 ...
## $ X5010_25_S25: num 88 23 3 2 402 ...
## $ X5010_26_S26: num 113 6 7 6 275 ...
## $ X5010_27_S27: num 188 4 2 7 227 ...
## $ X5010_28_S28: num 173 1 1 0 299 2 36 216 997 2860 ...
## $ X5010_29_S29: num 59 4 2 4 207 ...
## $ X5010_30_S30: num 63 4 1 4 303 ...
## $ X5010_31_S31: num 87 6 0 20 248 ...
## $ X5010_32_S32: num 104 16 3 2 262 ...
## $ X5010_33_S33: num 106 3 5 0 372 ...
## $ X5010_34_S34: num 56 6 0 9 385 ...
## $ X5010_35_S35: num 75 4 6 7 303 ...
## $ X5010_36_S36: num 28 5 0 0 204 ...
## $ X5010_37_S37: num 204 0 1 2 374 ...
## $ X5010_38_S38: num 191 3 0 2 366 ...
## $ X5010_39_S39: num 77 6 5 315 225 ...
## $ X5010_40_S40: num 174 11 0 18 221 ...
## $ X5010_41_S41: num 141 7 0 8 352 ...
## $ X5010_42_S42: num 94 13 2 5 403 ...
## $ X5010_43_S43: num 57 1 0 0 343 ...
## $ X5010_44_S44: num 139 16 3 9 888 ...
## $ X5010_45_S45: num 2 0 0 11 4 0 2 4 24 75 ...
## $ X5010_46_S46: num 189 4 6 3 226 ...
## $ X5010_47_S47: num 127 11 3 2 321 ...
## $ X5010_48_S48: num 107 4 1 0 534 ...
## $ X5010_49_S49: num 102 4 0 0 546 ...
## $ X5010_50_S50: num 107 8 5 0 473 ...
## $ X5010_51_S51: num 370 15 6 1 425 ...
## $ X5010_52_S52: num 116 30 7 4 311 ...
## $ X5010_53_S53: num 99 15 6 0 663 ...
## $ X5010_54_S54: num 143 10 10 3 386 ...
## $ X5010_55_S55: num 188 2 4 251 332 0 32 360 1040 3780 ...
## $ X5010_56_S56: num 100 5 6 3 506 ...
## $ X5010_57_S57: num 84 3 3 297 376 ...
## $ X5010_58_S58: num 129 0 0 184 561 ...
## $ X5010_59_S59: num 116 4 0 7 320 ...
## $ X5010_60_S60: num 58 16 7 13 425 ...
## $ X5010_61_S61: num 121 9 11 3 394 ...
## $ X5010_62_S62: num 98 24 2 3 354 ...
## $ X5010_63_S63: num 128 2 1 0 367 ...
## [1] 3294 63
## [1] 63 2
## condition type
## 1 control paired-end
## 2 control paired-end
## 3 control paired-end
## 4 control paired-end
## 5 control paired-end
## 6 control paired-end
## 7 control paired-end
## 8 control paired-end
## 9 control paired-end
## 10 control paired-end
## 11 control paired-end
## 12 control paired-end
## 13 pH 7.5-long exposure paired-end
## 14 pH 7.5-long exposure paired-end
## 15 pH 7.5-long exposure paired-end
## 16 pH 7.5-long exposure paired-end
## 17 pH 7.5-long exposure paired-end
## 18 pH 7.5-long exposure paired-end
## 19 pH 7.5-long exposure paired-end
## 20 pH 7.5-long exposure paired-end
## 21 pH 7.5-long exposure paired-end
## 22 pH 7.5-long exposure paired-end
## 23 pH 7.5-long exposure paired-end
## 24 pH 7.5-long exposure paired-end
## 25 pH 7.5-long exposure paired-end
## 26 pH 7.5-long exposure paired-end
## 27 pH 7.5-short exposure paired-end
## 28 pH 7.5-short exposure paired-end
## 29 pH 7.5-short exposure paired-end
## 30 pH 7.5-short exposure paired-end
## 31 pH 7.5-short exposure paired-end
## 32 pH 7.5-short exposure paired-end
## 33 pH 7.5-short exposure paired-end
## 34 pH 7.5-short exposure paired-end
## 35 pH 7.5-short exposure paired-end
## 36 pH 7.5-short exposure paired-end
## 37 pH 7.5-short exposure paired-end
## 38 pH 7.5-short exposure paired-end
## 39 pH 7.8-long exposure paired-end
## 40 pH 7.8-long exposure paired-end
## 41 pH 7.8-long exposure paired-end
## 42 pH 7.8-long exposure paired-end
## 43 pH 7.8-long exposure paired-end
## 44 pH 7.8-long exposure paired-end
## 45 pH 7.8-long exposure paired-end
## 46 pH 7.8-long exposure paired-end
## 47 pH 7.8-long exposure paired-end
## 48 pH 7.8-long exposure paired-end
## 49 pH 7.8-long exposure paired-end
## 50 pH 7.8-long exposure paired-end
## 51 pH 7.8-long exposure paired-end
## 52 pH 7.8-long exposure paired-end
## 53 pH 7.8-short exposure paired-end
## 54 pH 7.8-short exposure paired-end
## 55 pH 7.8-short exposure paired-end
## 56 pH 7.8-short exposure paired-end
## 57 pH 7.8-short exposure paired-end
## 58 pH 7.8-short exposure paired-end
## 59 pH 7.8-short exposure paired-end
## 60 pH 7.8-short exposure paired-end
## 61 pH 7.8-short exposure paired-end
## 62 pH 7.8-short exposure paired-end
## 63 pH 7.8-short exposure paired-end
## class: DESeqDataSet
## dim: 3294 63
## metadata(1): version
## assays(1): counts
## rownames(3294): contig_3677 contig_551 ... contig_4738 contig_3758
## rowData names(0):
## colnames(63): X5010_1_S1 X5010_2_S2 ... X5010_62_S62 X5010_63_S63
## colData names(2): condition type
head(deseq2.res)
## log2 fold change (MLE): condition pH.7.8.short.exposure vs control
## Wald test p-value: condition pH.7.8.short.exposure vs control
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## contig_100 37.11779 0.378808 0.398122 0.951487 0.341357 0.768479
## contig_1000 41.73616 0.271854 0.181810 1.495270 0.134844 0.583872
## contig_1003 54.68653 0.184489 0.409173 0.450882 0.652075 0.912112
## contig_1005 7.97371 0.165513 0.371475 0.445555 0.655919 0.913866
## contig_1006 72.35878 -0.308162 0.562138 -0.548196 0.583557 0.888517
## contig_1007 674.88257 -0.160801 0.182866 -0.879335 0.379220 0.797610
# Count number of hits with adjusted p-value less then 0.05
dim(deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ])
## [1] 25 6
tmp <- deseq2.res
# The main plot
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",
main="DEG Snow Crab Treatments (pval <= 0.05)",
xlab="mean of normalized counts",
ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")
write.table(tmp.sig, "../output/DEGlist.tab", row.names = T)