Mercurial > repos > cstrittmatter > ss2v110
annotate deinterleave_fastq.sh @ 9:43f6b7f6ebb3 draft
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author | cstrittmatter |
---|---|
date | Thu, 30 Apr 2020 21:47:42 -0400 |
parents | fc22ec8e924e |
children |
rev | line source |
---|---|
0
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
1 #!/bin/bash |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress] |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
3 # |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
4 # Deinterleaves a FASTQ file of paired reads into two FASTQ |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
5 # files specified on the command line. Optionally GZip compresses the output |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
6 # FASTQ files using pigz if the 3rd command line argument is the word "compress" |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
7 # |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
8 # Can deinterleave 100 million paired reads (200 million total |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
10 # |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
11 # Latest code: https://gist.github.com/3521724 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
12 # Also see my interleaving script: https://gist.github.com/4544979 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
13 # |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
14 # Inspired by Torsten Seemann's blog post: |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
16 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
17 # Set up some defaults |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
18 GZIP_OUTPUT=0 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
19 PIGZ_COMPRESSION_THREADS=10 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
20 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
21 # If the third argument is the word "compress" then we'll compress the output using pigz |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
22 if [[ $3 == "compress" ]]; then |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
23 GZIP_OUTPUT=1 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
24 fi |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
25 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
26 if [[ ${GZIP_OUTPUT} == 0 ]]; then |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
28 else |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2 |
fc22ec8e924e
planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
cstrittmatter
parents:
diff
changeset
|
30 fi |