Download data - I already have this downloaded.

cd ../data 
curl -O https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam 
curl -O https://gannet.fish.washington.edu/seashell/bu-mox/scrubbed/120321-cvBS/19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam.bai

download bed file

cd ../data 
curl -O https://eagle.fish.washington.edu/Cvirg_tracks/C_virginica-3.0_Gnomon_genes.bed

Convert the bed file to a bam file

/home/shared/bedtools2/bin/bedtools bamtobed -i /home/shared/8TB_HDD_02/schulh2/GitHub/haila-coursework/assignments/data/19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam > /home/shared/8TB_HDD_02/schulh2/GitHub/haila-coursework/assignments/output/08-19F.bed

Default Behavior

/home/shared/bedtools2/bin/bedtools coverage \
-a ../data/C_virginica-3.0_Gnomon_genes.bed \
-b ../output/08-19F.bed \
> ../output/08-gene-19F-coverage.out

inspect file

head ../output/08-gene-19F-coverage.out
## NC_035780.1  13578   14594   gene-LOC111116054   0   +   65  1008    1016    0.9921260
## NC_035780.1  28961   33324   gene-LOC111126949   0   +   2679    4363    4363    1.0000000
## NC_035780.1  43111   66897   gene-LOC111110729   0   -   11794   23132   23786   0.9725049
## NC_035780.1  85606   95254   gene-LOC111112434   0   -   4618    9648    9648    1.0000000
## NC_035780.1  99840   106460  gene-LOC111120752   0   +   4023    6620    6620    1.0000000
## NC_035780.1  108305  110077  gene-LOC111128944   0   -   1527    1772    1772    1.0000000
## NC_035780.1  151859  157536  gene-LOC111128953   0   +   3334    5677    5677    1.0000000
## NC_035780.1  163809  183798  gene-LOC111105691   0   -   6683    19625   19989   0.9817900
## NC_035780.1  164820  166793  gene-LOC111105685   0   +   684 1973    1973    1.0000000
## NC_035780.1  169468  170178  gene-LOC111105702   0   -   138 710 710 1.0000000

Intersect

download bed file of transposable elements and IncRNAs

cd ../data
curl -O http://owl.fish.washington.edu/halfshell/genomic-databank/cgigas_uk_roslin_v1_gene.gff 
curl -O http://owl.fish.washington.edu/halfshell/genomic-databank/cgigas_uk_roslin_v1_rm.te.bed 
curl -O http://owl.fish.washington.edu/halfshell/genomic-databank/cgigas_uk_roslin_v1_lncRNA.gff

Find where the files intersect

/home/shared/bedtools2/bin/bedtools intersect \
-a ../data/cgigas_uk_roslin_v1_gene.gff \
-b ../data/cgigas_uk_roslin_v1_rm.te.bed \
> ../output/08-gene-TE-intersect.out

head -2 ../output/08-gene-TE-intersect.out
## NC_047559.1  Gnomon  gene    15715   15759   .   +   .   ID=gene-LOC109621113;Dbxref=GeneID:109621113;Name=LOC109621113;gbkey=Gene;gene=LOC109621113;gene_biotype=protein_coding
## NC_047559.1  Gnomon  gene    19138   19160   .   -   .   ID=gene-LOC117687066;Dbxref=GeneID:117687066;Name=LOC117687066;gbkey=Gene;gene=LOC117687066;gene_biotype=protein_coding

Closest

/home/shared/bedtools2/bin/bedtools closest \
-a ../data/cgigas_uk_roslin_v1_lncRNA.gff \
-b ../data/cgigas_uk_roslin_v1_gene.gff \
> ../output/08-lnc-gene-closet.out

take a look at file

head ../output/08-lnc-gene-closet.out
## NC_047559.1  Gnomon  lnc_RNA 9839    11386   .   +   .   ID=rna-XR_004604272.1;Parent=gene-LOC117693020;Dbxref=GeneID:117693020,Genbank:XR_004604272.1;Name=XR_004604272.1;gbkey=ncRNA;gene=LOC117693020;model_evidence=Supporting evidence includes similarity to: 1 EST%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 15 samples with support for all annotated introns;product=uncharacterized LOC117693020;transcript_id=XR_004604272.1  NC_047559.1 Gnomon  gene    9839    11386   .   +   .   ID=gene-LOC117693020;Dbxref=GeneID:117693020;Name=LOC117693020;gbkey=Gene;gene=LOC117693020;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 167270  168430  .   -   .   ID=rna-XR_004601744.1;Parent=gene-LOC117689460;Dbxref=GeneID:117689460,Genbank:XR_004601744.1;Name=XR_004601744.1;gbkey=ncRNA;gene=LOC117689460;model_evidence=Supporting evidence includes similarity to: 3 long SRA reads%2C and 98%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 52 samples with support for all annotated introns;product=uncharacterized LOC117689460;transcript_id=XR_004601744.1    NC_047559.1 Gnomon  gene    151758  185673  .   +   .   ID=gene-LOC117687070;Dbxref=GeneID:117687070;Name=LOC117687070;gbkey=Gene;gene=LOC117687070;gene_biotype=protein_coding
## NC_047559.1  Gnomon  lnc_RNA 167270  168430  .   -   .   ID=rna-XR_004601744.1;Parent=gene-LOC117689460;Dbxref=GeneID:117689460,Genbank:XR_004601744.1;Name=XR_004601744.1;gbkey=ncRNA;gene=LOC117689460;model_evidence=Supporting evidence includes similarity to: 3 long SRA reads%2C and 98%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 52 samples with support for all annotated introns;product=uncharacterized LOC117689460;transcript_id=XR_004601744.1    NC_047559.1 Gnomon  gene    167270  168430  .   -   .   ID=gene-LOC117689460;Dbxref=GeneID:117689460;Name=LOC117689460;gbkey=Gene;gene=LOC117689460;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 226703  229170  .   +   .   ID=rna-XR_004596449.1;Parent=gene-LOC105326952;Dbxref=GeneID:105326952,Genbank:XR_004596449.1;Name=XR_004596449.1;gbkey=ncRNA;gene=LOC105326952;model_evidence=Supporting evidence includes similarity to: 1 long SRA read%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 5 samples with support for all annotated introns;product=uncharacterized LOC105326952;transcript_id=XR_004596449.1 NC_047559.1 Gnomon  gene    226703  229170  .   +   .   ID=gene-LOC105326952;Dbxref=GeneID:105326952;Name=LOC105326952;gbkey=Gene;gene=LOC105326952;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 242189  242939  .   -   .   ID=rna-XR_004602779.1;Parent=gene-LOC117690921;Dbxref=GeneID:117690921,Genbank:XR_004602779.1;Name=XR_004602779.1;gbkey=ncRNA;gene=LOC117690921;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 46 samples with support for all annotated introns;product=uncharacterized LOC117690921;transcript_id=XR_004602779.1   NC_047559.1 Gnomon  gene    242189  242939  .   -   .   ID=gene-LOC117690921;Dbxref=GeneID:117690921;Name=LOC117690921;gbkey=Gene;gene=LOC117690921;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 254843  258147  .   +   .   ID=rna-XR_004602775.1;Parent=gene-LOC117690907;Dbxref=GeneID:117690907,Genbank:XR_004602775.1;Name=XR_004602775.1;gbkey=ncRNA;gene=LOC117690907;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 9 samples with support for all annotated introns;product=uncharacterized LOC117690907;transcript_id=XR_004602775.1    NC_047559.1 Gnomon  gene    254843  258147  .   +   .   ID=gene-LOC117690907;Dbxref=GeneID:117690907;Name=LOC117690907;gbkey=Gene;gene=LOC117690907;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 415528  418254  .   -   .   ID=rna-XR_004598226.1;Parent=gene-LOC117684466;Dbxref=GeneID:117684466,Genbank:XR_004598226.1;Name=XR_004598226.1;gbkey=ncRNA;gene=LOC117684466;model_evidence=Supporting evidence includes similarity to: 2 long SRA reads%2C and 92%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 125 samples with support for all annotated introns;product=uncharacterized LOC117684466%2C transcript variant X1;transcript_id=XR_004598226.1  NC_047559.1 Gnomon  gene    355807  433459  .   +   .   ID=gene-LOC105346550;Dbxref=GeneID:105346550;Name=LOC105346550;gbkey=Gene;gene=LOC105346550;gene_biotype=protein_coding
## NC_047559.1  Gnomon  lnc_RNA 415528  418254  .   -   .   ID=rna-XR_004598226.1;Parent=gene-LOC117684466;Dbxref=GeneID:117684466,Genbank:XR_004598226.1;Name=XR_004598226.1;gbkey=ncRNA;gene=LOC117684466;model_evidence=Supporting evidence includes similarity to: 2 long SRA reads%2C and 92%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 125 samples with support for all annotated introns;product=uncharacterized LOC117684466%2C transcript variant X1;transcript_id=XR_004598226.1  NC_047559.1 Gnomon  gene    415528  421174  .   -   .   ID=gene-LOC117684466;Dbxref=GeneID:117684466;Name=LOC117684466;gbkey=Gene;gene=LOC117684466;gene_biotype=lncRNA
## NC_047559.1  Gnomon  lnc_RNA 415530  421174  .   -   .   ID=rna-XR_004598230.1;Parent=gene-LOC117684466;Dbxref=GeneID:117684466,Genbank:XR_004598230.1;Name=XR_004598230.1;gbkey=ncRNA;gene=LOC117684466;model_evidence=Supporting evidence includes similarity to: 2 long SRA reads%2C and 92%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 6 samples with support for all annotated introns;product=uncharacterized LOC117684466%2C transcript variant X4;transcript_id=XR_004598230.1    NC_047559.1 Gnomon  gene    355807  433459  .   +   .   ID=gene-LOC105346550;Dbxref=GeneID:105346550;Name=LOC105346550;gbkey=Gene;gene=LOC105346550;gene_biotype=protein_coding
## NC_047559.1  Gnomon  lnc_RNA 415530  421174  .   -   .   ID=rna-XR_004598230.1;Parent=gene-LOC117684466;Dbxref=GeneID:117684466,Genbank:XR_004598230.1;Name=XR_004598230.1;gbkey=ncRNA;gene=LOC117684466;model_evidence=Supporting evidence includes similarity to: 2 long SRA reads%2C and 92%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 6 samples with support for all annotated introns;product=uncharacterized LOC117684466%2C transcript variant X4;transcript_id=XR_004598230.1    NC_047559.1 Gnomon  gene    415528  421174  .   -   .   ID=gene-LOC117684466;Dbxref=GeneID:117684466;Name=LOC117684466;gbkey=Gene;gene=LOC117684466;gene_biotype=lncRNA