comparison scripts/TEMP_Absence.sh @ 0:28d1a6f8143f draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Mon, 25 Apr 2016 13:08:56 -0400
parents
children ca36262102d8
comparison
equal deleted inserted replaced
-1:000000000000 0:28d1a6f8143f
1 #!/bin/bash -x
2 # TEMP (Transposable Element Movement present in a Population)
3 # 2013-06-14
4 # Jiali Zhuang(jiali.zhuang@umassmed.edu)
5 # Zhiping Weng Lab
6 # Programs in Bioinformatics and Integrative Biology
7 # University of Massachusetts Medical School
8
9 #usage function
10 usage() {
11 echo -en "\e[1;36m"
12 cat <<EOF
13
14 usage: $0 -i input_file.sorted.bam -s scripts_directory -o output_directory -r transposon_rpmk.bed -t reference.2bit -f fragment_size -c CPUs -h
15
16 TEMP is a software package for detecting transposable elements (TEs)
17 insertions and excisions from pooled high-throughput sequencing data.
18 Please send questions, suggestions and bug reports to:
19 jiali.zhuang@umassmed.edu
20
21 Options:
22 -i Input file in bam format with full path. Please sort and index the file before calling this program.
23 Sorting and indexing can be done by 'samtools sort' and 'samtools index'
24 -s Directory where all the scripts are
25 -o Path to output directory. Default is current directory
26 -r Annotated transposon positions in the genome (e.g., repeakMask) in bed6 format with full path
27 -t 2bit file for the reference genome (can be downloaded from UCSC Genome Browser)
28 -f An integer specifying the length of the fragments (inserts) of the library. Default is 500
29 -c An integer specifying the number of CUPs used. Default is 4
30 -h Show help message
31
32 EOF
33 echo -en "\e[0m"
34 }
35
36 # taking options
37 while getopts "hi:c:f:o:r:s:t:" OPTION
38 do
39 case $OPTION in
40 h)
41 usage && exit 1
42 ;;
43 i)
44 BAM=$OPTARG
45 ;;
46 f)
47 INSERT=$OPTARG
48 ;;
49 o)
50 OUTDIR=$OPTARG
51 ;;
52 c)
53 CPU=$OPTARG
54 ;;
55 s)
56 BINDIR=$OPTARG
57 ;;
58 r)
59 TEBED=$OPTARG
60 ;;
61 t)
62 REF=$OPTARG
63 ;;
64 ?)
65 usage && exit 1
66 ;;
67 esac
68 done
69
70 if [[ -z $BAM ]] || [[ -z $BINDIR ]] || [[ -z $TEBED ]] || [[ -z $REF ]]
71 then
72 usage && exit 1
73 fi
74 [ ! -z "${CPU##*[!0-9]*}" ] || CPU=4
75 [ ! -z "${INSERT##*[!0-9]*}" ] || INSERT=500
76 [ ! -z $OUTDIR ] || OUTDIR=$PWD
77
78 mkdir -p "${OUTDIR}" || echo -e "\e[1;31mWarning: Cannot create directory ${OUTDIR}. Using the direcory of input fastq file\e[0m"
79 cd ${OUTDIR} || echo -e "\e[1;31mError: Cannot access directory ${OUTDIR}... Exiting...\e[0m" || exit 1
80 touch ${OUTDIR}/.writting_permission && rm -rf ${OUTDIR}/.writting_permission || echo -e "\e[1;31mError: Cannot write in directory ${OUTDIR}... Exiting...\e[0m" || exit 1
81
82 function checkExist {
83 echo -ne "\e[1;32m\"${1}\" is using: \e[0m" && which "$1"
84 [[ $? != 0 ]] && echo -e "\e[1;36mError: cannot find software/function ${1}! Please make sure that you have installed the pipeline correctly.\nExiting...\e[0m" && exit 1
85 }
86 echo -e "\e[1;35mTesting required softwares/scripts:\e[0m"
87 checkExist "echo"
88 checkExist "rm"
89 checkExist "mkdir"
90 checkExist "date"
91 checkExist "mv"
92 checkExist "sort"
93 checkExist "touch"
94 checkExist "awk"
95 checkExist "grep"
96 checkExist "bwa"
97 checkExist "samtools"
98 echo -e "\e[1;35mDone with testing required softwares/scripts, starting pipeline...\e[0m"
99
100 name=`basename $BAM`
101 i=${name/.sorted.bam/}
102 echo $name
103 echo $i
104 if [[ ! -s $name ]]
105 then
106 cp $BAM ./
107 fi
108 if [[ ! -s $name.bai ]]
109 then cp $BAM.bai ./
110 fi
111
112 #Detect excision sites
113 samtools view -XF 0x2 $name > $i.unpair.sam
114 awk -F "\t" '{OFS="\t"; if ($9 != 0) print $0}' $i.unpair.sam > temp1.sam
115 perl $BINDIR/pickUniqIntervalPos.pl temp1.sam $INSERT > $i.unproper.uniq.interval.bed
116
117 rm temp1.sam $i.unpair.sam
118
119 # Sometimes $i.unproper.uniq.interval.bed contains malformed bed entries
120 # These must be removed to prevent the script failing
121 awk '{if ($3>=$2 && $3 > 0 && $2 > 0) print $0}' $i.unproper.uniq.interval.bed > $i.unproper.uniq.interval.fixed.bed
122 mv $i.unproper.uniq.interval.fixed.bed $i.unproper.uniq.interval.bed
123
124 # Map to transposons
125 bedtools intersect -a $TEBED -b $i.unproper.uniq.interval.bed -f 1.0 -wo > temp
126 perl $BINDIR/filterFalsePositive.ex.pl temp $INSERT $i.final.pairs.rpmk.bed
127 bedtools intersect -a $TEBED -b $i.final.pairs.rpmk.bed -f 1.0 -wo > temp2
128
129 perl $BINDIR/excision.clustering.pl temp2 $i.excision.cluster.rpmk
130 rm temp temp2 $i.unproper.uniq.interval.bed $i.final.pairs.rpmk.bed
131
132 # Identify breakpoints using soft-clipping information
133 perl $BINDIR/pickSoftClipping.over.pl $i.excision.cluster.rpmk $REF > $i.excision.cluster.rpmk.sfcp
134 perl $BINDIR/refine_breakpoint.ex.pl
135
136 # Estimate excision sites frequencies
137 perl $BINDIR/pickOverlapPair.ex.pl $i.excision.cluster.rpmk.refined.bp > $i.excision.cluster.rpmk.refined.bp.refsup
138 perl $BINDIR/summarize_excision.pl