cd ../data
curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_genome_v1.01.fasta 
ls ../data
##   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
##                                  Dload  Upload   Total   Spent    Left  Speed
## 
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  2 18.1M    2  496k    0     0  1042k      0  0:00:17 --:--:--  0:00:17 1040k
 35 18.1M   35 6629k    0     0  4479k      0  0:00:04  0:00:01  0:00:03 4476k
100 18.1M  100 18.1M    0     0  8056k      0  0:00:02  0:00:02 --:--:-- 8056k
## 06_tview_screengrab.jpeg
## 19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam
## 19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam.bai
## Ab_4denovo_CLC6_a.fa
## cbai_genome_v1.01.fasta
## cbai_genome_v1.01.fasta.fai
## cgigas_roslin_rna.index
## cgigas_uk_roslin_v1_genomic-mito.fa
## cgigas_uk_roslin_v1_genomic-mito.fa.fai
## D54_S145_L001_R1_001.fastq.gz
## D54_S145_L002_R1_001.fastq.gz
## D55_S146_L001_R1_001.fastq.gz
## D55_S146_L002_R1_001.fastq.gz
## D56_S136_L001_R1_001.fastq.gz
## D57_S143_L001_R1_001.fastq.gz
## D58_S144_L001_R1_001.fastq.gz
## D59_S142_L001_R1_001.fastq.gz
## F143n08_R1_001.fastq.gz
## F143n08_R2_001.fastq.gz
## GCF_002022765.2_C_virginica-3.0_genomic.fa
## GCF_002022765.2_C_virginica-3.0_genomic.fa.fai
## GCF_902806645.1_cgigas_uk_roslin_v1_genomic-mito.gtf
## M45_S140_L001_R1_001.fastq.gz
## M46_S141_L001_R1_001.fastq.gz
## M48_S137_L001_R1_001.fastq.gz
## M49_S139_L001_R1_001.fastq.gz
## M89_S138_L001_R1_001.fastq.gz
## M90_S147_L001_R1_001.fastq.gz
## N48_S194_L001_R1_001.fastq.gz
## N49_S185_L001_R1_001.fastq.gz
## N50_S187_L001_R1_001.fastq.gz
## N51_S186_L001_R1_001.fastq.gz
## N52_S184_L001_R1_001.fastq.gz
## N53_S188_L001_R1_001.fastq.gz
## N54_S193_L001_R1_001.fastq.gz
## N55_S190_L001_R1_001.fastq.gz
## N56_S192_L001_R1_001.fastq.gz
## N57_S191_L001_R1_001.fastq.gz
## N58_S195_L001_R1_001.fastq.gz
## N59_S189_L001_R1_001.fastq.gz
## rna.fna
library(seqinr)

input_file <- "../data/cbai_genome_v1.01.fasta"
sequences <- read.fasta(input_file)
# Set the seed for reproducibility (optional)
set.seed(42)

number_of_sequences_to_select <- 10

if (length(sequences) < number_of_sequences_to_select) {
  warning("There are fewer than 10 sequences in the fasta file. All sequences will be selected.")
  number_of_sequences_to_select <- length(sequences)
}

selected_indices <- sample(length(sequences), number_of_sequences_to_select)
selected_sequences <- sequences[selected_indices]
# Replace 'output.fasta' with your desired output file name
output_file <- "../output/07-output.fa"
write.fasta(selected_sequences, names(selected_sequences), output_file, open = "w")
# download index file
cd ../data
curl -O https://owl.fish.washington.edu/halfshell/genomic-databank/cbai_genome_v1.01.fasta.fai
ls ../data
##   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
##                                  Dload  Upload   Total   Spent    Left  Speed
## 
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 76608  100 76608    0     0   959k      0 --:--:-- --:--:-- --:--:--  959k
## 06_tview_screengrab.jpeg
## 19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam
## 19F_R1_val_1_bismark_bt2_pe.deduplicated.sorted.bam.bai
## Ab_4denovo_CLC6_a.fa
## cbai_genome_v1.01.fasta
## cbai_genome_v1.01.fasta.fai
## cgigas_roslin_rna.index
## cgigas_uk_roslin_v1_genomic-mito.fa
## cgigas_uk_roslin_v1_genomic-mito.fa.fai
## D54_S145_L001_R1_001.fastq.gz
## D54_S145_L002_R1_001.fastq.gz
## D55_S146_L001_R1_001.fastq.gz
## D55_S146_L002_R1_001.fastq.gz
## D56_S136_L001_R1_001.fastq.gz
## D57_S143_L001_R1_001.fastq.gz
## D58_S144_L001_R1_001.fastq.gz
## D59_S142_L001_R1_001.fastq.gz
## F143n08_R1_001.fastq.gz
## F143n08_R2_001.fastq.gz
## GCF_002022765.2_C_virginica-3.0_genomic.fa
## GCF_002022765.2_C_virginica-3.0_genomic.fa.fai
## GCF_902806645.1_cgigas_uk_roslin_v1_genomic-mito.gtf
## M45_S140_L001_R1_001.fastq.gz
## M46_S141_L001_R1_001.fastq.gz
## M48_S137_L001_R1_001.fastq.gz
## M49_S139_L001_R1_001.fastq.gz
## M89_S138_L001_R1_001.fastq.gz
## M90_S147_L001_R1_001.fastq.gz
## N48_S194_L001_R1_001.fastq.gz
## N49_S185_L001_R1_001.fastq.gz
## N50_S187_L001_R1_001.fastq.gz
## N51_S186_L001_R1_001.fastq.gz
## N52_S184_L001_R1_001.fastq.gz
## N53_S188_L001_R1_001.fastq.gz
## N54_S193_L001_R1_001.fastq.gz
## N55_S190_L001_R1_001.fastq.gz
## N56_S192_L001_R1_001.fastq.gz
## N57_S191_L001_R1_001.fastq.gz
## N58_S195_L001_R1_001.fastq.gz
## N59_S189_L001_R1_001.fastq.gz
## rna.fna
#needed downstream for IGV
/home/shared/samtools-1.12/samtools faidx \
../output/07-output.fa
fuzznuc -sequence ../output/07-output.fa -pattern CG -rformat gff -outfile ../output/CGoutput.gff
## Search for patterns in nucleotide sequences

IGV screenshot of CG motifs

IGV screenshot of CG motifs