0
|
1 #! /bin/sh
|
|
2
|
|
3 # Align paired bisulfite-converted DNA reads to a genome.
|
|
4
|
|
5 # This assumes that reads1.fastq are all from the converted strand
|
|
6 # (i.e. they have C->T conversions) and reads2.fastq are all from the
|
|
7 # reverse-complement (i.e. they have G->A conversions).
|
|
8
|
|
9 # "GNU parallel" needs to be installed.
|
|
10
|
|
11 [ $# -eq 4 ] || {
|
|
12 cat <<EOF
|
|
13 Typical usage:
|
|
14
|
|
15 lastdb -uBISF my_f mygenome.fa
|
|
16 lastdb -uBISR my_r mygenome.fa
|
|
17
|
|
18 $(basename $0) my_f my_r reads1.fastq reads2.fastq > results.maf
|
|
19
|
|
20 EOF
|
|
21 exit 2
|
|
22 }
|
|
23
|
|
24 # Try to get the LAST programs into the PATH, if they aren't already:
|
|
25 PATH=$PATH:$(dirname $0)/../src
|
|
26
|
|
27 tmp=${TMPDIR-/tmp}/$$
|
|
28 trap 'rm -f $tmp.*' EXIT
|
|
29
|
|
30 cat > $tmp.script << 'EOF'
|
|
31 t=$1.$$
|
|
32
|
|
33 lastal -pBISF -s1 -Q1 -e120 -i1 "$2" "$4" > $t.t1f
|
|
34 lastal -pBISR -s0 -Q1 -e120 -i1 "$3" "$4" > $t.t1r
|
|
35 last-merge-batches $t.t1f $t.t1r > $t.t1
|
|
36 rm $t.t1f $t.t1r
|
|
37
|
|
38 lastal -pBISF -s0 -Q1 -e120 -i1 "$2" "$5" > $t.t2f
|
|
39 lastal -pBISR -s1 -Q1 -e120 -i1 "$3" "$5" > $t.t2r
|
|
40 last-merge-batches $t.t2f $t.t2r > $t.t2
|
|
41 rm $t.t2f $t.t2r
|
|
42
|
|
43 last-pair-probs -m0.1 $t.t1 $t.t2 |
|
|
44 perl -F'(\s+)' -ane '$F[12] =~ y/ta/CG/ if /^s/ and $s++ % 2; print @F'
|
|
45 rm $t.t1 $t.t2
|
|
46 EOF
|
|
47
|
|
48 # Convert C to t, and all other letters to uppercase:
|
|
49 perl -pe 'y/Cca-z/ttA-Z/ if $. % 4 == 2' "$3" | split -l400000 -a5 - $tmp.1
|
|
50
|
|
51 # Convert G to a, and all other letters to uppercase:
|
|
52 perl -pe 'y/Gga-z/aaA-Z/ if $. % 4 == 2' "$4" | split -l400000 -a5 - $tmp.2
|
|
53
|
|
54 parallel --gnu --xapply sh $tmp.script $tmp "$1" "$2" ::: $tmp.1* ::: $tmp.2*
|