0
|
1 #! /bin/sh
|
|
2
|
|
3 # Align bisulfite-converted DNA reads to a genome.
|
|
4
|
|
5 # This assumes that the reads are all from the converted strand
|
|
6 # (i.e. they have C->T conversions, not G->A conversions).
|
|
7
|
|
8 [ $# -gt 1 ] || {
|
|
9 cat <<EOF
|
|
10 Typical usage:
|
|
11
|
|
12 lastdb -uBISF my_f mygenome.fa
|
|
13 lastdb -uBISR my_r mygenome.fa
|
|
14
|
|
15 $(basename $0) my_f my_r reads.fastq > results.maf
|
|
16
|
|
17 EOF
|
|
18 exit 2
|
|
19 }
|
|
20 my_f=$1
|
|
21 my_r=$2
|
|
22 shift 2
|
|
23
|
|
24 # Try to get the LAST programs into the PATH, if they aren't already:
|
|
25 PATH=$PATH:$(dirname $0)/../src
|
|
26
|
|
27 tmp=${TMPDIR-/tmp}/$$
|
|
28 trap 'rm -f $tmp.*' EXIT
|
|
29
|
|
30 # Convert C to t, and all other letters to uppercase:
|
|
31 perl -pe 'y/Cca-z/ttA-Z/ if $. % 4 == 2' "$@" > "$tmp".q
|
|
32
|
|
33 lastal -pBISF -s1 -Q1 -e120 "$my_f" "$tmp".q > "$tmp".f
|
|
34 lastal -pBISR -s0 -Q1 -e120 "$my_r" "$tmp".q > "$tmp".r
|
|
35
|
|
36 last-merge-batches "$tmp".f "$tmp".r | last-split -m0.1 |
|
|
37 perl -F'(\s+)' -ane '$F[12] =~ y/ta/CG/ if /^s/ and $s++ % 2; print @F'
|