Mercurial > repos > portiahollyoak > temp
comparison scripts/TEMP_Absence.sh @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author | portiahollyoak |
---|---|
date | Mon, 25 Apr 2016 13:08:56 -0400 |
parents | |
children | ca36262102d8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:28d1a6f8143f |
---|---|
1 #!/bin/bash -x | |
2 # TEMP (Transposable Element Movement present in a Population) | |
3 # 2013-06-14 | |
4 # Jiali Zhuang(jiali.zhuang@umassmed.edu) | |
5 # Zhiping Weng Lab | |
6 # Programs in Bioinformatics and Integrative Biology | |
7 # University of Massachusetts Medical School | |
8 | |
9 #usage function | |
10 usage() { | |
11 echo -en "\e[1;36m" | |
12 cat <<EOF | |
13 | |
14 usage: $0 -i input_file.sorted.bam -s scripts_directory -o output_directory -r transposon_rpmk.bed -t reference.2bit -f fragment_size -c CPUs -h | |
15 | |
16 TEMP is a software package for detecting transposable elements (TEs) | |
17 insertions and excisions from pooled high-throughput sequencing data. | |
18 Please send questions, suggestions and bug reports to: | |
19 jiali.zhuang@umassmed.edu | |
20 | |
21 Options: | |
22 -i Input file in bam format with full path. Please sort and index the file before calling this program. | |
23 Sorting and indexing can be done by 'samtools sort' and 'samtools index' | |
24 -s Directory where all the scripts are | |
25 -o Path to output directory. Default is current directory | |
26 -r Annotated transposon positions in the genome (e.g., repeakMask) in bed6 format with full path | |
27 -t 2bit file for the reference genome (can be downloaded from UCSC Genome Browser) | |
28 -f An integer specifying the length of the fragments (inserts) of the library. Default is 500 | |
29 -c An integer specifying the number of CUPs used. Default is 4 | |
30 -h Show help message | |
31 | |
32 EOF | |
33 echo -en "\e[0m" | |
34 } | |
35 | |
36 # taking options | |
37 while getopts "hi:c:f:o:r:s:t:" OPTION | |
38 do | |
39 case $OPTION in | |
40 h) | |
41 usage && exit 1 | |
42 ;; | |
43 i) | |
44 BAM=$OPTARG | |
45 ;; | |
46 f) | |
47 INSERT=$OPTARG | |
48 ;; | |
49 o) | |
50 OUTDIR=$OPTARG | |
51 ;; | |
52 c) | |
53 CPU=$OPTARG | |
54 ;; | |
55 s) | |
56 BINDIR=$OPTARG | |
57 ;; | |
58 r) | |
59 TEBED=$OPTARG | |
60 ;; | |
61 t) | |
62 REF=$OPTARG | |
63 ;; | |
64 ?) | |
65 usage && exit 1 | |
66 ;; | |
67 esac | |
68 done | |
69 | |
70 if [[ -z $BAM ]] || [[ -z $BINDIR ]] || [[ -z $TEBED ]] || [[ -z $REF ]] | |
71 then | |
72 usage && exit 1 | |
73 fi | |
74 [ ! -z "${CPU##*[!0-9]*}" ] || CPU=4 | |
75 [ ! -z "${INSERT##*[!0-9]*}" ] || INSERT=500 | |
76 [ ! -z $OUTDIR ] || OUTDIR=$PWD | |
77 | |
78 mkdir -p "${OUTDIR}" || echo -e "\e[1;31mWarning: Cannot create directory ${OUTDIR}. Using the direcory of input fastq file\e[0m" | |
79 cd ${OUTDIR} || echo -e "\e[1;31mError: Cannot access directory ${OUTDIR}... Exiting...\e[0m" || exit 1 | |
80 touch ${OUTDIR}/.writting_permission && rm -rf ${OUTDIR}/.writting_permission || echo -e "\e[1;31mError: Cannot write in directory ${OUTDIR}... Exiting...\e[0m" || exit 1 | |
81 | |
82 function checkExist { | |
83 echo -ne "\e[1;32m\"${1}\" is using: \e[0m" && which "$1" | |
84 [[ $? != 0 ]] && echo -e "\e[1;36mError: cannot find software/function ${1}! Please make sure that you have installed the pipeline correctly.\nExiting...\e[0m" && exit 1 | |
85 } | |
86 echo -e "\e[1;35mTesting required softwares/scripts:\e[0m" | |
87 checkExist "echo" | |
88 checkExist "rm" | |
89 checkExist "mkdir" | |
90 checkExist "date" | |
91 checkExist "mv" | |
92 checkExist "sort" | |
93 checkExist "touch" | |
94 checkExist "awk" | |
95 checkExist "grep" | |
96 checkExist "bwa" | |
97 checkExist "samtools" | |
98 echo -e "\e[1;35mDone with testing required softwares/scripts, starting pipeline...\e[0m" | |
99 | |
100 name=`basename $BAM` | |
101 i=${name/.sorted.bam/} | |
102 echo $name | |
103 echo $i | |
104 if [[ ! -s $name ]] | |
105 then | |
106 cp $BAM ./ | |
107 fi | |
108 if [[ ! -s $name.bai ]] | |
109 then cp $BAM.bai ./ | |
110 fi | |
111 | |
112 #Detect excision sites | |
113 samtools view -XF 0x2 $name > $i.unpair.sam | |
114 awk -F "\t" '{OFS="\t"; if ($9 != 0) print $0}' $i.unpair.sam > temp1.sam | |
115 perl $BINDIR/pickUniqIntervalPos.pl temp1.sam $INSERT > $i.unproper.uniq.interval.bed | |
116 | |
117 rm temp1.sam $i.unpair.sam | |
118 | |
119 # Sometimes $i.unproper.uniq.interval.bed contains malformed bed entries | |
120 # These must be removed to prevent the script failing | |
121 awk '{if ($3>=$2 && $3 > 0 && $2 > 0) print $0}' $i.unproper.uniq.interval.bed > $i.unproper.uniq.interval.fixed.bed | |
122 mv $i.unproper.uniq.interval.fixed.bed $i.unproper.uniq.interval.bed | |
123 | |
124 # Map to transposons | |
125 bedtools intersect -a $TEBED -b $i.unproper.uniq.interval.bed -f 1.0 -wo > temp | |
126 perl $BINDIR/filterFalsePositive.ex.pl temp $INSERT $i.final.pairs.rpmk.bed | |
127 bedtools intersect -a $TEBED -b $i.final.pairs.rpmk.bed -f 1.0 -wo > temp2 | |
128 | |
129 perl $BINDIR/excision.clustering.pl temp2 $i.excision.cluster.rpmk | |
130 rm temp temp2 $i.unproper.uniq.interval.bed $i.final.pairs.rpmk.bed | |
131 | |
132 # Identify breakpoints using soft-clipping information | |
133 perl $BINDIR/pickSoftClipping.over.pl $i.excision.cluster.rpmk $REF > $i.excision.cluster.rpmk.sfcp | |
134 perl $BINDIR/refine_breakpoint.ex.pl | |
135 | |
136 # Estimate excision sites frequencies | |
137 perl $BINDIR/pickOverlapPair.ex.pl $i.excision.cluster.rpmk.refined.bp > $i.excision.cluster.rpmk.refined.bp.refsup | |
138 perl $BINDIR/summarize_excision.pl |