1 Introduction

This project contains a workflow for examining the effect of ocean acidification on gene expression of snow crabs. It runs using paired end RNA-Seq data prepared and provided by Laura Spencer. 63 juvenile crabs were exposed to the following pH treatments:

  • control (ambient)
  • pH 7.8 (8 hours)
  • pH 7.8 (12 weeks)
  • pH 7.5 (8 hours)
  • pH 7.5 (12 weeks)

On 7/20/2021, after exposure, all crabs were sacrificed by puncturing the carapace through the cardiac region. Crabs were preserved overnight at 4C prior to being transferred to -80C. mRNA samples were then extracted. Each of the 63 mRNA samples were run in both lanes of NovaSeq 6000, for a grand total of 126 paired-end RNA-Seq data sets. Sequence data received January 3, 2022.

End goal: heatmap of gene expression.

1.2 Data

The data is located here, uploaded by LS on 01/12/2022.

Detailed metadata is located here.

2 Code

2.1 Look at all RNASeq files

#there are 63 files within /5010 
ls /home/shared/8TB_HDD_01/snow_crab/5010

2.2 Download C. bairdi reference, save it to “data” directory

cd ../data
curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_genome_v1.0.fasta

2.3 Set index

#index "cbai_genome_v1.0.fasta" and rename it as "cbai_genome_v1.0.index"
/home/shared/kallisto/kallisto \
index -i \
../data/cbai_genome_v1.0.index \  
../data/cbai_genome_v1.0.fasta

2.4 Look at files within folder 5010

#list file names
ls /home/shared/8TB_HDD_01/snow_crab/5010/*fastq.gz

2.5 Create abundance estimates

#make a new directory in output
mkdir ../output/kallisto_paired

#find specific files and create abundance estimates
find /home/shared/8TB_HDD_01/snow_crab/5010/*fastq.gz \
| xargs basename -s _L003_R1_001.fastq.gz | xargs -I{} \
/home/shared/kallisto/kallisto \
quant -i ../data/cbai_genome_v1.0.index \
-o ../output/kallisto_paired/{} \
-t 40 \
/home/shared/8TB_HDD_01/snow_crab/5010/{}_L003_R1_001.fastq.gz \
/home/shared/8TB_HDD_01/snow_crab/5010/{}_L003_R2_001.fastq.gz

# delete empty files
rm ../output/kallisto_paired/*R2_001.fastq.gz

2.6 Create a gene expression matrix from kallisto output files

Run abundance_estimates_to_matrix.pl script from the Trinity RNA-seq assembly software package.

ls ~/courtney_RNAseq_crabs/output/kallisto_paired
#use these folder names for perl input
#Sample name input for perl
perl /home/shared/trinityrnaseq-v2.12.0/util/abundance_estimates_to_matrix.pl \
--est_method kallisto \
    --gene_trans_map none \
    --out_prefix ../output/kallisto_paired \
    --name_sample_by_basedir \
    ../output/kallisto_paired/5010_1_S1/abundance.tsv \
    ../output/kallisto_paired/5010_2_S2/abundance.tsv \
    ../output/kallisto_paired/5010_3_S3/abundance.tsv \
    ../output/kallisto_paired/5010_4_S4/abundance.tsv \
    ../output/kallisto_paired/5010_5_S5/abundance.tsv \
    ../output/kallisto_paired/5010_6_S6/abundance.tsv \
    ../output/kallisto_paired/5010_7_S7/abundance.tsv \
    ../output/kallisto_paired/5010_8_S8/abundance.tsv \
    ../output/kallisto_paired/5010_9_S9/abundance.tsv \
    ../output/kallisto_paired/5010_10_S10/abundance.tsv \
    ../output/kallisto_paired/5010_11_S11/abundance.tsv \
    ../output/kallisto_paired/5010_12_S12/abundance.tsv \
    ../output/kallisto_paired/5010_13_S13/abundance.tsv \
    ../output/kallisto_paired/5010_14_S14/abundance.tsv \
    ../output/kallisto_paired/5010_15_S15/abundance.tsv \
    ../output/kallisto_paired/5010_16_S16/abundance.tsv \
    ../output/kallisto_paired/5010_17_S17/abundance.tsv \
    ../output/kallisto_paired/5010_18_S18/abundance.tsv \
    ../output/kallisto_paired/5010_19_S19/abundance.tsv \
    ../output/kallisto_paired/5010_20_S20/abundance.tsv \
    ../output/kallisto_paired/5010_21_S21/abundance.tsv \
    ../output/kallisto_paired/5010_22_S22/abundance.tsv \
    ../output/kallisto_paired/5010_23_S23/abundance.tsv \
    ../output/kallisto_paired/5010_24_S24/abundance.tsv \
    ../output/kallisto_paired/5010_25_S25/abundance.tsv \
    ../output/kallisto_paired/5010_26_S26/abundance.tsv \
    ../output/kallisto_paired/5010_27_S27/abundance.tsv \
    ../output/kallisto_paired/5010_28_S28/abundance.tsv \
    ../output/kallisto_paired/5010_29_S29/abundance.tsv \
    ../output/kallisto_paired/5010_30_S30/abundance.tsv \
    ../output/kallisto_paired/5010_31_S31/abundance.tsv \
    ../output/kallisto_paired/5010_32_S32/abundance.tsv \
    ../output/kallisto_paired/5010_33_S33/abundance.tsv \
    ../output/kallisto_paired/5010_34_S34/abundance.tsv \
    ../output/kallisto_paired/5010_35_S35/abundance.tsv \
    ../output/kallisto_paired/5010_36_S36/abundance.tsv \
    ../output/kallisto_paired/5010_37_S37/abundance.tsv \
    ../output/kallisto_paired/5010_38_S38/abundance.tsv \
    ../output/kallisto_paired/5010_39_S39/abundance.tsv \
    ../output/kallisto_paired/5010_40_S40/abundance.tsv \
    ../output/kallisto_paired/5010_41_S41/abundance.tsv \
    ../output/kallisto_paired/5010_42_S42/abundance.tsv \
    ../output/kallisto_paired/5010_43_S43/abundance.tsv \
    ../output/kallisto_paired/5010_44_S44/abundance.tsv \
    ../output/kallisto_paired/5010_45_S45/abundance.tsv \
    ../output/kallisto_paired/5010_46_S46/abundance.tsv \
    ../output/kallisto_paired/5010_47_S47/abundance.tsv \
    ../output/kallisto_paired/5010_48_S48/abundance.tsv \
    ../output/kallisto_paired/5010_49_S49/abundance.tsv \
    ../output/kallisto_paired/5010_50_S50/abundance.tsv \
    ../output/kallisto_paired/5010_51_S51/abundance.tsv \
    ../output/kallisto_paired/5010_52_S52/abundance.tsv \
    ../output/kallisto_paired/5010_53_S53/abundance.tsv \
    ../output/kallisto_paired/5010_54_S54/abundance.tsv \
    ../output/kallisto_paired/5010_55_S55/abundance.tsv \
    ../output/kallisto_paired/5010_56_S56/abundance.tsv \
    ../output/kallisto_paired/5010_57_S57/abundance.tsv \
    ../output/kallisto_paired/5010_58_S58/abundance.tsv \
    ../output/kallisto_paired/5010_59_S59/abundance.tsv \
    ../output/kallisto_paired/5010_60_S60/abundance.tsv \
    ../output/kallisto_paired/5010_61_S61/abundance.tsv \
    ../output/kallisto_paired/5010_62_S62/abundance.tsv \
    ../output/kallisto_paired/5010_63_S63/abundance.tsv

2.7 Get packages

2.8 Read in count matrix

countmatrix <- read.delim("../output/kallisto_paired.isoform.counts.matrix", header = TRUE, sep = '\t')
rownames(countmatrix) <- countmatrix$X
countmatrix <- countmatrix[,-1]
head(countmatrix)
##             X5010_1_S1 X5010_2_S2 X5010_3_S3 X5010_4_S4 X5010_5_S5 X5010_6_S6
## contig_3677  826.95200   84.39420  311.85700    44.0388   73.00000  101.28200
## contig_551    10.78500   10.22240    6.04837     0.0000    8.52952    0.00000
## contig_2354    1.25878    2.45334    0.00000     0.0000    0.00000    1.17853
## contig_1623    1.27709   20.22590    0.00000     0.0000  174.84500    0.00000
## contig_3784  318.48400  276.34100  173.72400   191.0710  260.15200  179.54500
## contig_453     0.00000    0.00000    0.00000     0.0000    2.01666    1.00364
##             X5010_7_S7 X5010_8_S8 X5010_9_S9 X5010_10_S10 X5010_11_S11
## contig_3677  105.12200  170.98900  358.74500     150.1950    136.84000
## contig_551     1.45547    2.74287    7.74467      13.3783     17.61910
## contig_2354    1.15811    1.30359    0.00000       0.0000      3.89947
## contig_1623    0.00000   24.25130    1.67627       5.4100      0.00000
## contig_3784  163.23900  364.76700  542.24400     478.7500    255.99000
## contig_453     0.00000    0.00000    0.00000       0.0000      0.00000
##             X5010_12_S12 X5010_13_S13 X5010_14_S14 X5010_15_S15 X5010_16_S16
## contig_3677     83.05630    584.46000    137.85000     113.1400     93.61910
## contig_551       5.27151      4.26126      2.71456      12.3357      1.61551
## contig_2354      5.12464      2.43988      3.47747       0.0000      7.38579
## contig_1623     13.83930      8.43169      0.00000       0.0000      0.00000
## contig_3784    274.07300    319.06000    465.23300     595.2260    400.90900
## contig_453       0.00000      0.00000      1.03679       0.0000      1.00969
##             X5010_17_S17 X5010_18_S18 X5010_19_S19 X5010_20_S20 X5010_21_S21
## contig_3677    125.35900     66.15110     82.31020      64.0000    108.76500
## contig_551       9.35177     19.44690      3.40289       2.3116      6.61237
## contig_2354      1.13798      4.58402      8.48434       0.0000      3.88187
## contig_1623      6.43131    217.67600      4.40942      24.3209     11.95220
## contig_3784    351.61900    288.11600    210.51400     224.6160    181.11400
## contig_453       0.00000      0.00000      0.00000       0.0000      0.00000
##             X5010_22_S22 X5010_23_S23 X5010_24_S24 X5010_25_S25 X5010_26_S26
## contig_3677    104.25700     60.57210     92.19830     88.12340    112.90400
## contig_551       3.83241      8.09999     19.44950     22.75870      6.34299
## contig_2354      3.95112      5.51476      3.44172      2.98265      6.92777
## contig_1623      4.32650    329.15600      7.19839      1.53834      5.98053
## contig_3784    438.27900    446.79800    390.37000    402.17700    274.57000
## contig_453       0.00000      0.00000      1.01785      0.00000      1.00131
##             X5010_27_S27 X5010_28_S28 X5010_29_S29 X5010_30_S30 X5010_31_S31
## contig_3677    187.58500    173.37500     58.65560     63.05000     86.70020
## contig_551       4.00957      1.34195      3.81138      4.12474      5.61875
## contig_2354      2.38835      1.17504      2.45281      1.30954      0.00000
## contig_1623      6.68866      0.00000      4.35702      3.78490     20.16760
## contig_3784    226.78200    299.39300    207.17600    303.25700    248.22300
## contig_453       0.00000      2.04331      1.02055      0.00000      0.00000
##             X5010_32_S32 X5010_33_S33 X5010_34_S34 X5010_35_S35 X5010_36_S36
## contig_3677    104.05700    106.32200     56.00000     74.86760     28.01630
## contig_551      16.47710      2.92159      5.93593      3.51616      4.96861
## contig_2354      3.20801      5.26595      0.00000      6.08613      0.00000
## contig_1623      1.59525      0.00000      8.84724      7.15886      0.00000
## contig_3784    262.01100    371.65200    385.30800    303.46400    203.68800
## contig_453       0.00000      1.11682      0.00000      1.01366      0.00000
##             X5010_37_S37 X5010_38_S38 X5010_39_S39 X5010_40_S40 X5010_41_S41
## contig_3677    204.29300    190.90400     77.13010     173.5560    140.91500
## contig_551       0.00000      2.94719      5.68182      11.0547      6.75250
## contig_2354      1.10422      0.00000      4.87875       0.0000      0.00000
## contig_1623      1.81430      2.45500    315.48200      17.7316      7.82375
## contig_3784    373.87600    365.95300    224.82100     220.8530    351.72500
## contig_453       0.00000      0.00000      0.00000       0.0000      1.00713
##             X5010_42_S42 X5010_43_S43 X5010_44_S44 X5010_45_S45 X5010_46_S46
## contig_3677     94.05610     57.43530    138.81300      2.00000    188.75100
## contig_551      12.73010      1.35086     15.93280      0.00000      3.95477
## contig_2354      1.52899      0.00000      2.68201      0.00000      6.08133
## contig_1623      4.67600      0.00000      8.68860     11.08320      3.29040
## contig_3784    403.33500    343.48600    888.41600      4.29281    226.22800
## contig_453       3.03447      0.00000      0.00000      0.00000      0.00000
##             X5010_47_S47 X5010_48_S48 X5010_49_S49 X5010_50_S50 X5010_51_S51
## contig_3677    127.25200    107.37900    101.79000    106.95600    370.35600
## contig_551      10.78060      4.05752      3.54854      8.10482     14.80880
## contig_2354      2.75001      1.27826      0.00000      4.94971      6.47401
## contig_1623      2.34736      0.00000      0.00000      0.00000      1.42552
## contig_3784    321.39000    533.80300    546.13900    472.58100    424.91900
## contig_453       0.00000      0.00000      0.00000      4.12687      0.00000
##             X5010_52_S52 X5010_53_S53 X5010_54_S54 X5010_55_S55 X5010_56_S56
## contig_3677    115.70400     99.25490    143.47200    187.78900    100.06500
## contig_551      29.59790     15.40760     10.05300      1.50031      5.34908
## contig_2354      6.97468      5.57517      9.89977      4.45763      5.89888
## contig_1623      3.87394      0.00000      3.41067    251.32300      3.32341
## contig_3784    310.96100    662.58300    385.76500    331.84300    505.78300
## contig_453       0.00000      2.00339      0.00000      0.00000      1.02605
##             X5010_57_S57 X5010_58_S58 X5010_59_S59 X5010_60_S60 X5010_61_S61
## contig_3677     84.15200      129.472    116.00000      57.5758    120.76200
## contig_551       3.14446        0.000      3.81271      15.8914      9.21032
## contig_2354      2.59376        0.000      0.00000       7.2040     10.94710
## contig_1623    297.32700      183.620      6.57290      13.4404      2.64150
## contig_3784    375.88700      561.064    320.28700     425.2500    394.29200
## contig_453       2.08115        0.000      0.00000       0.0000      0.00000
##             X5010_62_S62 X5010_63_S63
## contig_3677     97.69350    128.18400
## contig_551      23.82790      2.46923
## contig_2354      2.34724      1.11446
## contig_1623      2.75472      0.00000
## contig_3784    354.43700    366.75000
## contig_453       0.00000      0.00000

2.8.1 Round up to whole numbers

## 'data.frame':    3294 obs. of  63 variables:
##  $ X5010_1_S1  : num  827 11 1 1 318 ...
##  $ X5010_2_S2  : num  84 10 2 20 276 ...
##  $ X5010_3_S3  : num  312 6 0 0 174 ...
##  $ X5010_4_S4  : num  44 0 0 0 191 0 27 141 878 2160 ...
##  $ X5010_5_S5  : num  73 9 0 175 260 ...
##  $ X5010_6_S6  : num  101 0 1 0 180 ...
##  $ X5010_7_S7  : num  105 1 1 0 163 ...
##  $ X5010_8_S8  : num  171 3 1 24 365 ...
##  $ X5010_9_S9  : num  359 8 0 2 542 ...
##  $ X5010_10_S10: num  150 13 0 5 479 ...
##  $ X5010_11_S11: num  137 18 4 0 256 ...
##  $ X5010_12_S12: num  83 5 5 14 274 ...
##  $ X5010_13_S13: num  584 4 2 8 319 ...
##  $ X5010_14_S14: num  138 3 3 0 465 ...
##  $ X5010_15_S15: num  113 12 0 0 595 ...
##  $ X5010_16_S16: num  94 2 7 0 401 ...
##  $ X5010_17_S17: num  125 9 1 6 352 ...
##  $ X5010_18_S18: num  66 19 5 218 288 ...
##  $ X5010_19_S19: num  82 3 8 4 211 ...
##  $ X5010_20_S20: num  64 2 0 24 225 ...
##  $ X5010_21_S21: num  109 7 4 12 181 ...
##  $ X5010_22_S22: num  104 4 4 4 438 ...
##  $ X5010_23_S23: num  61 8 6 329 447 ...
##  $ X5010_24_S24: num  92 19 3 7 390 ...
##  $ X5010_25_S25: num  88 23 3 2 402 ...
##  $ X5010_26_S26: num  113 6 7 6 275 ...
##  $ X5010_27_S27: num  188 4 2 7 227 ...
##  $ X5010_28_S28: num  173 1 1 0 299 2 36 216 997 2860 ...
##  $ X5010_29_S29: num  59 4 2 4 207 ...
##  $ X5010_30_S30: num  63 4 1 4 303 ...
##  $ X5010_31_S31: num  87 6 0 20 248 ...
##  $ X5010_32_S32: num  104 16 3 2 262 ...
##  $ X5010_33_S33: num  106 3 5 0 372 ...
##  $ X5010_34_S34: num  56 6 0 9 385 ...
##  $ X5010_35_S35: num  75 4 6 7 303 ...
##  $ X5010_36_S36: num  28 5 0 0 204 ...
##  $ X5010_37_S37: num  204 0 1 2 374 ...
##  $ X5010_38_S38: num  191 3 0 2 366 ...
##  $ X5010_39_S39: num  77 6 5 315 225 ...
##  $ X5010_40_S40: num  174 11 0 18 221 ...
##  $ X5010_41_S41: num  141 7 0 8 352 ...
##  $ X5010_42_S42: num  94 13 2 5 403 ...
##  $ X5010_43_S43: num  57 1 0 0 343 ...
##  $ X5010_44_S44: num  139 16 3 9 888 ...
##  $ X5010_45_S45: num  2 0 0 11 4 0 2 4 24 75 ...
##  $ X5010_46_S46: num  189 4 6 3 226 ...
##  $ X5010_47_S47: num  127 11 3 2 321 ...
##  $ X5010_48_S48: num  107 4 1 0 534 ...
##  $ X5010_49_S49: num  102 4 0 0 546 ...
##  $ X5010_50_S50: num  107 8 5 0 473 ...
##  $ X5010_51_S51: num  370 15 6 1 425 ...
##  $ X5010_52_S52: num  116 30 7 4 311 ...
##  $ X5010_53_S53: num  99 15 6 0 663 ...
##  $ X5010_54_S54: num  143 10 10 3 386 ...
##  $ X5010_55_S55: num  188 2 4 251 332 0 32 360 1040 3780 ...
##  $ X5010_56_S56: num  100 5 6 3 506 ...
##  $ X5010_57_S57: num  84 3 3 297 376 ...
##  $ X5010_58_S58: num  129 0 0 184 561 ...
##  $ X5010_59_S59: num  116 4 0 7 320 ...
##  $ X5010_60_S60: num  58 16 7 13 425 ...
##  $ X5010_61_S61: num  121 9 11 3 394 ...
##  $ X5010_62_S62: num  98 24 2 3 354 ...
##  $ X5010_63_S63: num  128 2 1 0 367 ...

2.9 Get differential gene expression based on pH exposure

## [1] 3294   63
## [1] 63  2
##                condition       type
## 1                control paired-end
## 2                control paired-end
## 3                control paired-end
## 4                control paired-end
## 5                control paired-end
## 6                control paired-end
## 7                control paired-end
## 8                control paired-end
## 9                control paired-end
## 10               control paired-end
## 11               control paired-end
## 12               control paired-end
## 13  pH 7.5-long exposure paired-end
## 14  pH 7.5-long exposure paired-end
## 15  pH 7.5-long exposure paired-end
## 16  pH 7.5-long exposure paired-end
## 17  pH 7.5-long exposure paired-end
## 18  pH 7.5-long exposure paired-end
## 19  pH 7.5-long exposure paired-end
## 20  pH 7.5-long exposure paired-end
## 21  pH 7.5-long exposure paired-end
## 22  pH 7.5-long exposure paired-end
## 23  pH 7.5-long exposure paired-end
## 24  pH 7.5-long exposure paired-end
## 25  pH 7.5-long exposure paired-end
## 26  pH 7.5-long exposure paired-end
## 27 pH 7.5-short exposure paired-end
## 28 pH 7.5-short exposure paired-end
## 29 pH 7.5-short exposure paired-end
## 30 pH 7.5-short exposure paired-end
## 31 pH 7.5-short exposure paired-end
## 32 pH 7.5-short exposure paired-end
## 33 pH 7.5-short exposure paired-end
## 34 pH 7.5-short exposure paired-end
## 35 pH 7.5-short exposure paired-end
## 36 pH 7.5-short exposure paired-end
## 37 pH 7.5-short exposure paired-end
## 38 pH 7.5-short exposure paired-end
## 39  pH 7.8-long exposure paired-end
## 40  pH 7.8-long exposure paired-end
## 41  pH 7.8-long exposure paired-end
## 42  pH 7.8-long exposure paired-end
## 43  pH 7.8-long exposure paired-end
## 44  pH 7.8-long exposure paired-end
## 45  pH 7.8-long exposure paired-end
## 46  pH 7.8-long exposure paired-end
## 47  pH 7.8-long exposure paired-end
## 48  pH 7.8-long exposure paired-end
## 49  pH 7.8-long exposure paired-end
## 50  pH 7.8-long exposure paired-end
## 51  pH 7.8-long exposure paired-end
## 52  pH 7.8-long exposure paired-end
## 53 pH 7.8-short exposure paired-end
## 54 pH 7.8-short exposure paired-end
## 55 pH 7.8-short exposure paired-end
## 56 pH 7.8-short exposure paired-end
## 57 pH 7.8-short exposure paired-end
## 58 pH 7.8-short exposure paired-end
## 59 pH 7.8-short exposure paired-end
## 60 pH 7.8-short exposure paired-end
## 61 pH 7.8-short exposure paired-end
## 62 pH 7.8-short exposure paired-end
## 63 pH 7.8-short exposure paired-end
## class: DESeqDataSet 
## dim: 3294 63 
## metadata(1): version
## assays(1): counts
## rownames(3294): contig_3677 contig_551 ... contig_4738 contig_3758
## rowData names(0):
## colnames(63): X5010_1_S1 X5010_2_S2 ... X5010_62_S62 X5010_63_S63
## colData names(2): condition type

2.10 Look at DESeq matrix

head(deseq2.res)
## log2 fold change (MLE): condition pH.7.8.short.exposure vs control 
## Wald test p-value: condition pH.7.8.short.exposure vs control 
## DataFrame with 6 rows and 6 columns
##              baseMean log2FoldChange     lfcSE      stat    pvalue      padj
##             <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
## contig_100   37.11779       0.378808  0.398122  0.951487  0.341357  0.768479
## contig_1000  41.73616       0.271854  0.181810  1.495270  0.134844  0.583872
## contig_1003  54.68653       0.184489  0.409173  0.450882  0.652075  0.912112
## contig_1005   7.97371       0.165513  0.371475  0.445555  0.655919  0.913866
## contig_1006  72.35878      -0.308162  0.562138 -0.548196  0.583557  0.888517
## contig_1007 674.88257      -0.160801  0.182866 -0.879335  0.379220  0.797610

2.11 Look for signficant values

# Count number of hits with adjusted p-value less then 0.05
dim(deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ])
## [1] 25  6
tmp <- deseq2.res
# The main plot
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",
     main="DEG Snow Crab Treatments  (pval <= 0.05)",
     xlab="mean of normalized counts",
     ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")

write.table(tmp.sig, "../output/DEGlist.tab", row.names = T)