annotate build/scripts-3.6/deinterleave_fastq.sh @ 13:e3b74e412f40 draft

planemo upload commit 70dc513aa7d7ac6785847dfd86323687613b6b68-dirty
author cstrittmatter
date Fri, 15 May 2020 10:37:52 -0400
parents d0350fe29fdf
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
1 #!/bin/bash
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress]
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
3 #
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
4 # Deinterleaves a FASTQ file of paired reads into two FASTQ
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
5 # files specified on the command line. Optionally GZip compresses the output
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
6 # FASTQ files using pigz if the 3rd command line argument is the word "compress"
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
7 #
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
8 # Can deinterleave 100 million paired reads (200 million total
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s)
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
10 #
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
11 # Latest code: https://gist.github.com/3521724
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
12 # Also see my interleaving script: https://gist.github.com/4544979
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
13 #
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
14 # Inspired by Torsten Seemann's blog post:
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
16
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
17 # Set up some defaults
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
18 GZIP_OUTPUT=0
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
19 PIGZ_COMPRESSION_THREADS=10
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
20
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
21 # If the third argument is the word "compress" then we'll compress the output using pigz
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
22 if [[ $3 == "compress" ]]; then
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
23 GZIP_OUTPUT=1
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
24 fi
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
25
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
26 if [[ ${GZIP_OUTPUT} == 0 ]]; then
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
28 else
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2
d0350fe29fdf planemo upload commit c50df40caef2fb97c178d6890961e0e527992324
cstrittmatter
parents:
diff changeset
30 fi