Mercurial > repos > portiahollyoak > temp
diff scripts/TEMP_Insertion.sh @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author | portiahollyoak |
---|---|
date | Mon, 25 Apr 2016 13:08:56 -0400 |
parents | |
children | ca36262102d8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/TEMP_Insertion.sh Mon Apr 25 13:08:56 2016 -0400 @@ -0,0 +1,176 @@ +#!/bin/bash -x +# TEMP (Transposable Element Movement present in a Population) +# 2013-06-14 +# Jiali Zhuang(jiali.zhuang@umassmed.edu) +# Zhiping Weng Lab +# Programs in Bioinformatics and Integrative Biology +# University of Massachusetts Medical School + +#usage function +usage() { +echo -en "\e[1;36m" +cat <<EOF + +usage: $0 -i input_file.sorted.bam -s scripts_directory -o output_directory -r transposon_database.fa -t annotated_TEs.bed -m MISMATCH -f fragment_size -c CPUs -h + +TEMP is a software package for detecting transposable elements (TEs) +insertions and excisions from pooled high-throughput sequencing data. +Please send questions, suggestions and bug reports to: +jiali.zhuang@umassmed.edu + +Options: + -i Input file in bam format with full path. Please sort and index the file before calling this program. + Sorting and indexing can be done by 'samtools sort' and 'samtools index' + -s Directory where all the scripts are + -o Path to output directory. Default is current directory + -r Transposon sequence database in fasta format with full path + -t Annotated TEs in BED6 format with full path. Detected insertions that overlap with annoated TEs will be filtered. + -u TE families annotations. If supplied detected insertions overlap with annotated TE of the same family will be filtered. Only use with -t. + -m Number of mismatch allowed when mapping to TE concensus sequences. Default is 3 + -x The minimum score difference between the best hit and the second best hit for considering a read as uniquely mapped. For BWA mem. + -f An integer specifying the length of the fragments (inserts) of the library. Default is 500 + -c An integer specifying the number of CUPs used. Default is 8 + -h Show help message + +EOF +echo -en "\e[0m" +} + +# taking options +while getopts "hi:c:f:m:o:r:s:t:u:x:" OPTION +do + case $OPTION in + h) + usage && exit 1 + ;; + i) + BAM=$OPTARG + ;; + f) + INSERT=$OPTARG + ;; + m) + MM=$OPTARG + ;; + o) + OUTDIR=$OPTARG + ;; + c) + CPU=$OPTARG + ;; + s) + BINDIR=$OPTARG + ;; + r) + TESEQ=$OPTARG + ;; + t) + ANNO=$OPTARG + ;; + u) + FAMI=$OPTARG + ;; + x) + SCORE=$OPTARG + ;; + ?) + usage && exit 1 + ;; + esac +done + +if [[ -z $BAM ]] || [[ -z $BINDIR ]] || [[ -z $TESEQ ]] +then + usage && exit 1 +fi +[ ! -z "${CPU##*[!0-9]*}" ] || CPU=8 +[ ! -z "${INSERT##*[!0-9]*}" ] || INSERT=500 +[ ! -z "${MM##*[!0-9]*}" ] || MM=3 +[ ! -z "${SCORE##*[!0-9]*}" ] || SCORE=0 +[ ! -z $OUTDIR ] || OUTDIR=$PWD + +mkdir -p "${OUTDIR}" || echo -e "\e[1;31mWarning: Cannot create directory ${OUTDIR}. Using the direcory of input fastq file\e[0m" +cd ${OUTDIR} || echo -e "\e[1;31mError: Cannot access directory ${OUTDIR}... Exiting...\e[0m" || exit 1 +touch ${OUTDIR}/.writting_permission && rm -rf ${OUTDIR}/.writting_permission || echo -e "\e[1;31mError: Cannot write in directory ${OUTDIR}... Exiting...\e[0m" || exit 1 + +function checkExist { + echo -ne "\e[1;32m\"${1}\" is using: \e[0m" && which "$1" + [[ $? != 0 ]] && echo -e "\e[1;36mError: cannot find software/function ${1}! Please make sure that you have installed the pipeline correctly.\nExiting...\e[0m" && exit 1 +} +echo -e "\e[1;35mTesting required softwares/scripts:\e[0m" +checkExist "echo" +checkExist "rm" +checkExist "mkdir" +checkExist "date" +checkExist "mv" +checkExist "sort" +checkExist "touch" +checkExist "awk" +checkExist "grep" +checkExist "bwa" +checkExist "samtools" +echo -e "\e[1;35mDone with testing required softwares/scripts, starting pipeline...\e[0m" + +cp $TESEQ ./ +name=`basename $BAM` +te=`basename $TESEQ` +i=${name/.sorted.bam/} +echo $name +echo $i +if [[ ! -s $name ]] +then + cp $BAM ./ +fi +if [[ ! -s $name.bai ]] +then cp $BAM.bai ./ +fi + +# Get the mate seq of the uniq-unpaired reads +samtools view -XF 0x2 $name > $i.unpair.sam +if [[ $SCORE -eq 0 ]] +then + perl $BINDIR/pickUniqPairFastq.pl $i.unpair.sam $i.unpair.uniq + perl $BINDIR/pickUniqPos.pl $i.unpair.sam > $i.unpair.uniq.bed +else + perl $BINDIR/pickUniqPairFastq_MEM.pl $i.unpair.sam $i.unpair.uniq $SCORE + perl $BINDIR/pickUniqPos_MEM.pl $i.unpair.sam $SCORE > $i.unpair.uniq.bed +fi + +# Map to transposons +bwa index -a is $te +bwa aln -t $CPU -n $MM -l 100 -R 1000 $te $i.unpair.uniq.1.fastq > $i.unpair.uniq.1.sai +bwa aln -t $CPU -n $MM -l 100 -R 1000 $te $i.unpair.uniq.2.fastq > $i.unpair.uniq.2.sai +bwa sampe -P $te $i.unpair.uniq.1.sai $i.unpair.uniq.2.sai $i.unpair.uniq.1.fastq $i.unpair.uniq.2.fastq > $i.unpair.uniq.transposons.sam + + +#Summary +samtools view -hSXF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam +perl $BINDIR/pickUniqMate.pl $i.unpair.uniq.transposons.unpair.sam $i.unpair.uniq.bed > $i.unpair.uniq.transposons.bed +cp $i.unpair.uniq.transposons.bed $i.unpair.uniq.transposons.filtered.bed + + +#Prepare for insertion breakpoints identification +awk -F "\t" -v sample=$i '{OFS="\t"; print $1,$2,$3,sample,$5,$6}' $i.unpair.uniq.transposons.filtered.bed >> tmp +perl $BINDIR/mergeTagsWithoutGap.pl tmp > $i.uniq.transposons.filtered.woGap.bed +perl $BINDIR/mergeTagsWithGap.pl $i.uniq.transposons.filtered.woGap.bed $INSERT > $i.uniq.transposons.filtered.wGap.bed +rm tmp +perl $BINDIR/get_class.pl $i.uniq.transposons.filtered.wGap.bed $i > $i.uniq.transposons.filtered.wGap.class.bed +perl $BINDIR/make.bp.bed.pl $i.uniq.transposons.filtered.wGap.class.bed $ANNO $FAMI + +#rm $i.unpair.sam $i.unpair.uniq.bed $i.unpair.uniq.?.fastq $i.unpair.uniq.?.sai +rm $i.unpair.uniq.transposons.sam $i.unpair.uniq.transposons.unpair.sam $i.uniq.transposons.filtered.woGap.bed $i.uniq.transposons.filtered.wGap.bed + + +#Detect insertion breakpoints using soft-clipping information +perl $BINDIR/pickClippedFastq.pl $i $te +perl $BINDIR/refine_breakpoint.in.pl + + +#Estimate insertion frequencies +perl $BINDIR/pickOverlapPair.in.pl $i.insertion.refined.bp $INSERT > $i.insertion.refined.bp.summary + + +################################ +##End of processing insertions## +################################ +