Subsampling single reads (FASTQ):
cat single.fastq | awk '{ printf("%s",$0); n++; if(n%4==0) {
printf("\n");} else { printf("\t");} }' |
awk -v k=10000 'BEGIN{srand(systime() + PROCINFO["pid"]);}{s=x++<k?x1:int(rand()*x);if(s<k)R[s]=$0}END{for(i
in R)print R[i]}' |
awk -F"\t" '{print $1"\n"$2"\n"$3"\n"$4 > "single_sub.fastq"}'
Split single reads into two files (FASTQ):
sample(1)
@cluster_71:UMI_CCAATA
CAGCTTTGCAACCATACTCCCCCCGGAACCC
+
<?B?DEB::9>@?@C=?:=<<<:===@?=83
@cluster_82:UMI_TTTTTG
GCTTATGTTTTATAAAAATAAGTTGCCCCAG
+
75;6>>>>DEC7?6CC>A=9=>@C1<<9>12
@cluster_83:UMI_CCAGTC
CTCCCACAGTTCTGCGCCAGAGCGGAAGAGC
+
=:?*>4>;>66;C:612661<2322141=:-
@cluster_87:UMI_TGACAC
GGTTGAGCACAGGGTACTTTATTGATGGTAC
+
=@E@B=B::;;A=?A=9DEB9ECB8?B@@97
cat test.fq | awk '{ printf("%s", $0); n++; if(n%4==0) { printf("\n");} else { printf("\t");}}' | awk 'NR%2==0' | awk -F"\t" '{print $1"\n"$2"\n"$3"\n"$4 > "even.fq"}'
cat test.fq | awk '{ printf("%s", $0); n++; if(n%4==0) { printf("\n");} else { printf("\t");}}' | awk 'NR%2==1' | awk -F"\t" '{print $1"\n"$2"\n"$3"\n"$4 > "odd.fq"}'
sample(2)
@HWI-ST498:135:H2CKMADXY:2:1101:1183:2117 1:N:0:TGACCA
TGTACACACACACGATGATACATATACATATATATTCCTGATATATATATATATGTGTGTGTCCTGATATATATATATATGTGTGTGTGTGTCAGGAATA
+
4:AAADDF?F8<?C@AEA<<FCCHFFEFBEEBDCGDCGFE<F?@DGIA?B9BBGGBDGB88=F)==FC=@=C@E:D@D4=C?EFE?BBD;;BC36;;;;5
@HWI-ST498:135:H2CKMADXY:2:1101:1210:2118 1:N:0:TGACCA
TCCAGCCTGGGCGAAAGAGTGAAACTCCGTCTCAAAAAATAAAAAGAAAAGAAAAGAAAAGAAAAGAAAATTAGGCCGGTGTGGTGGCACACACCTGTAG
+
4=DDFFFHHHHHJIJJJJJGIJJJJJJJJHIJJJJJJJJJJJJIJJIGIJIHHHHFDFFFEDEEDDDDDDDDDDCDDDDDDDDDABDDDDCDDDBDDDDD
@HWI-ST498:135:H2CKMADXY:2:1101:1176:2176 1:N:0:TGACCA
AAGATGGGAGAGGTTAATTAAGGTGCCGGAGGAAGTGGAAGAGGCCACATTCAGTCCTGAGCAGCCTGACTGCAGGATCCGAAGGCTGGTGTGTTTTGCT
+
@@FFFFFHHHFHJFHIJIJJJJJAFHJJGGIIIJIDGIGGIGHIIJJJJIJGCHGAEHHGFFFFFFEECEADDDDBBDDDDDDBDDDDD<CABBDDDDDD
## tr -d '\r' : remove cr(carriage return)
cat test.fq | awk '{ printf("%s", $0); n++; if(n%4==0) { printf("\n");} else { printf("\t");}}' | tr -d '\r' | awk 'NR%2==1' | awk -F"\t" '{print $1"\n"$2"\n"$3"\n"$4 > "odd.fq"}