view macs2npk.sh @ 12:d4a428455057 draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 15:14:23 -0500
parents 72be1ab49f77
children
line wrap: on
line source

#!/bin/bash
# Converts macs xls output to narrowPeak output

# Command Usage: macs2npk.sh INPUTFILE OUTPUTFILE

if [[ "$#" -lt 1 ]]
    then
    echo $(basename $0) 1>&2
    echo "Converts MACS peak caller xls output file to narrowPeak format" 1>&2
    echo "USAGE:" 1>&2
    echo "$(basename $0) <MACSXlsFile> <outputDir>" 1>&2
    exit 1 
fi

MACSFILE=$1
if [[ ! -e ${MACSFILE} ]]
    then
    echo "MACS xls file ${MACSFILE} does not exist" 1>&2
    exit 1
fi
# ODIR=$(dirname ${MACSFILE})
# [[ $# -gt 1 ]] && ODIR=$2
# if [[ ! -d ${ODIR} ]]
#    then
#    echo "Output directory ${ODIR} does not exist" 1>&2
#    exit 1
# fi

# OFILE="${ODIR}/$(echo $(basename ${MACSFILE} '_peaks.xls')).regionPeak.gz"
OFILE="${2}"

# XLS format
# chr start stop length summit tags -10log10(pvalue) fold_enrichment %FDR

# narrowPeak format
# chr start stop name score strand signalValue -log10(pValue) -log10(qValue) summit

# Remove comments #
# Remove empty lines
# Remove header
# Sort by p-value and then rearrange columns
# adjust start coordinates

# Check if header has FDR column
header=$(sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | head -1)
hasFdr=0
echo ${header} | grep -q 'FDR' && hasFDR=1

if [[ ${hasFDR} -eq 1 ]]
    then
    sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
	sed 1d | \
	sort -k7nr,7nr | \
	awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t%f\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,-log(($9+1e-30)/100)/log(10),$5}' > ${OFILE}
else
    sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
	sed 1d | \
	sort -k7nr,7nr | \
	awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t-1\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,$5}' > ${OFILE}
fi