annotate macs2npk.sh @ 18:2786794298d1 draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 16:12:21 -0500
parents 72be1ab49f77
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
1 #!/bin/bash
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
2 # Converts macs xls output to narrowPeak output
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
3
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
4 # Command Usage: macs2npk.sh INPUTFILE OUTPUTFILE
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
5
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
6 if [[ "$#" -lt 1 ]]
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
7 then
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
8 echo $(basename $0) 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
9 echo "Converts MACS peak caller xls output file to narrowPeak format" 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
10 echo "USAGE:" 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
11 echo "$(basename $0) <MACSXlsFile> <outputDir>" 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
12 exit 1
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
13 fi
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
14
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
15 MACSFILE=$1
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
16 if [[ ! -e ${MACSFILE} ]]
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
17 then
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
18 echo "MACS xls file ${MACSFILE} does not exist" 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
19 exit 1
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
20 fi
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
21 # ODIR=$(dirname ${MACSFILE})
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
22 # [[ $# -gt 1 ]] && ODIR=$2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
23 # if [[ ! -d ${ODIR} ]]
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
24 # then
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
25 # echo "Output directory ${ODIR} does not exist" 1>&2
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
26 # exit 1
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
27 # fi
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
28
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
29 # OFILE="${ODIR}/$(echo $(basename ${MACSFILE} '_peaks.xls')).regionPeak.gz"
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
30 OFILE="${2}"
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
31
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
32 # XLS format
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
33 # chr start stop length summit tags -10log10(pvalue) fold_enrichment %FDR
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
34
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
35 # narrowPeak format
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
36 # chr start stop name score strand signalValue -log10(pValue) -log10(qValue) summit
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
37
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
38 # Remove comments #
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
39 # Remove empty lines
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
40 # Remove header
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
41 # Sort by p-value and then rearrange columns
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
42 # adjust start coordinates
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
43
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
44 # Check if header has FDR column
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
45 header=$(sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | head -1)
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
46 hasFdr=0
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
47 echo ${header} | grep -q 'FDR' && hasFDR=1
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
48
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
49 if [[ ${hasFDR} -eq 1 ]]
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
50 then
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
51 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
52 sed 1d | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
53 sort -k7nr,7nr | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
54 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t%f\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,-log(($9+1e-30)/100)/log(10),$5}' > ${OFILE}
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
55 else
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
56 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
57 sed 1d | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
58 sort -k7nr,7nr | \
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
59 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t-1\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,$5}' > ${OFILE}
72be1ab49f77 Uploaded
modencode-dcc
parents:
diff changeset
60 fi