Mercurial > repos > portiahollyoak > temp
view scripts/TEMP_Insertion.sh @ 21:9672fe07a232 draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
author | mvdbeek |
---|---|
date | Mon, 05 Dec 2016 09:58:47 -0500 |
parents | e7d3dc3e0ec9 |
children |
line wrap: on
line source
#!/bin/bash -x # TEMP (Transposable Element Movement present in a Population) # 2013-06-14 # Jiali Zhuang(jiali.zhuang@umassmed.edu) # Zhiping Weng Lab # Programs in Bioinformatics and Integrative Biology # University of Massachusetts Medical School #usage function usage() { echo -en "\e[1;36m" cat <<EOF usage: $0 -i input_file.sorted.bam -s scripts_directory -o output_directory -r transposon_database.fa -t annotated_TEs.bed -m MISMATCH -f fragment_size -c CPUs -h TEMP is a software package for detecting transposable elements (TEs) insertions and excisions from pooled high-throughput sequencing data. Please send questions, suggestions and bug reports to: jiali.zhuang@umassmed.edu Options: -i Input file in bam format with full path. Please sort and index the file before calling this program. Sorting and indexing can be done by 'samtools sort' and 'samtools index' -s Directory where all the scripts are -o Path to output directory. Default is current directory -r Transposon sequence database in fasta format with full path -t Annotated TEs in BED6 format with full path. Detected insertions that overlap with annoated TEs will be filtered. -u TE families annotations. If supplied detected insertions overlap with annotated TE of the same family will be filtered. Only use with -t. -m Number of mismatch allowed when mapping to TE concensus sequences. Default is 3 -x The minimum score difference between the best hit and the second best hit for considering a read as uniquely mapped. For BWA mem. -f An integer specifying the length of the fragments (inserts) of the library. Default is 500 -c An integer specifying the number of CPUs used. Default is 8 -h Show help message EOF echo -en "\e[0m" } # taking options while getopts "hi:c:f:m:o:r:s:t:u:x:" OPTION do case $OPTION in h) usage && exit 1 ;; i) BAM=$OPTARG ;; f) INSERT=$OPTARG ;; m) MM=$OPTARG ;; o) OUTDIR=$OPTARG ;; c) CPU=$OPTARG ;; s) BINDIR=$OPTARG ;; r) TESEQ=$OPTARG ;; t) ANNO=$OPTARG ;; u) FAMI=$OPTARG ;; x) SCORE=$OPTARG ;; ?) usage && exit 1 ;; esac done if [[ -z $BAM ]] || [[ -z $BINDIR ]] || [[ -z $TESEQ ]] then usage && exit 1 fi [ ! -z "${CPU##*[!0-9]*}" ] || CPU=8 [ ! -z "${INSERT##*[!0-9]*}" ] || INSERT=500 [ ! -z "${MM##*[!0-9]*}" ] || MM=3 [ ! -z "${SCORE##*[!0-9]*}" ] || SCORE=0 [ ! -z $OUTDIR ] || OUTDIR=$PWD mkdir -p "${OUTDIR}" || echo -e "\e[1;31mWarning: Cannot create directory ${OUTDIR}. Using the direcory of input fastq file\e[0m" cd ${OUTDIR} || echo -e "\e[1;31mError: Cannot access directory ${OUTDIR}... Exiting...\e[0m" || exit 1 touch ${OUTDIR}/.writting_permission && rm -rf ${OUTDIR}/.writting_permission || echo -e "\e[1;31mError: Cannot write in directory ${OUTDIR}... Exiting...\e[0m" || exit 1 function checkExist { echo -ne "\e[1;32m\"${1}\" is using: \e[0m" && which "$1" [[ $? != 0 ]] && echo -e "\e[1;36mError: cannot find software/function ${1}! Please make sure that you have installed the pipeline correctly.\nExiting...\e[0m" && exit 1 } echo -e "\e[1;35mTesting required softwares/scripts:\e[0m" checkExist "echo" checkExist "rm" checkExist "mkdir" checkExist "date" checkExist "mv" checkExist "sort" checkExist "touch" checkExist "awk" checkExist "grep" checkExist "bwa" checkExist "samtools" echo -e "\e[1;35mDone with testing required softwares/scripts, starting pipeline...\e[0m" cp $TESEQ ./ name=`basename $BAM` te=`basename $TESEQ` i=${name/.sorted.bam/} echo $name echo $i if [[ ! -s $name ]] then cp $BAM ./ fi if [[ ! -s $name.bai ]] then cp $BAM.bai ./ fi # Get the mate seq of the uniq-unpaired reads samtools view -F 0x2 $name > $i.unpair.sam if [[ $SCORE -eq 0 ]] then perl $BINDIR/pickUniqPairFastq.pl $i.unpair.sam $i.unpair.uniq perl $BINDIR/pickUniqPos.pl $i.unpair.sam > $i.unpair.uniq.bed else perl $BINDIR/pickUniqPairFastq_MEM.pl $i.unpair.sam $i.unpair.uniq $SCORE perl $BINDIR/pickUniqPos_MEM.pl $i.unpair.sam $SCORE > $i.unpair.uniq.bed fi # Map to transposons bwa index -a is $te bwa aln -t $CPU -n $MM -l 100 -R 1000 $te $i.unpair.uniq.1.fastq > $i.unpair.uniq.1.sai bwa aln -t $CPU -n $MM -l 100 -R 1000 $te $i.unpair.uniq.2.fastq > $i.unpair.uniq.2.sai bwa sampe -P $te $i.unpair.uniq.1.sai $i.unpair.uniq.2.sai $i.unpair.uniq.1.fastq $i.unpair.uniq.2.fastq > $i.unpair.uniq.transposons.sam #Summary samtools view -hSF 0x2 $i.unpair.uniq.transposons.sam > $i.unpair.uniq.transposons.unpair.sam perl $BINDIR/pickUniqMate.pl $i.unpair.uniq.transposons.unpair.sam $i.unpair.uniq.bed > $i.unpair.uniq.transposons.bed cp $i.unpair.uniq.transposons.bed $i.unpair.uniq.transposons.filtered.bed #Prepare for insertion breakpoints identification awk -F "\t" -v sample=$i '{OFS="\t"; print $1,$2,$3,sample,$5,$6}' $i.unpair.uniq.transposons.filtered.bed >> tmp perl $BINDIR/mergeTagsWithoutGap.pl tmp > $i.uniq.transposons.filtered.woGap.bed perl $BINDIR/mergeTagsWithGap.pl $i.uniq.transposons.filtered.woGap.bed $INSERT > $i.uniq.transposons.filtered.wGap.bed rm tmp perl $BINDIR/get_class.pl $i.uniq.transposons.filtered.wGap.bed $i > $i.uniq.transposons.filtered.wGap.class.bed perl $BINDIR/make.bp.bed.pl $i.uniq.transposons.filtered.wGap.class.bed $ANNO $FAMI echo "got FAMI" rm $i.unpair.sam $i.unpair.uniq.bed $i.unpair.uniq.?.fastq $i.unpair.uniq.?.sai rm $i.unpair.uniq.transposons.sam $i.unpair.uniq.transposons.unpair.sam $i.uniq.transposons.filtered.woGap.bed $i.uniq.transposons.filtered.wGap.bed #Detect insertion breakpoints using soft-clipping information perl $BINDIR/pickClippedFastq.pl $i $te perl $BINDIR/refine_breakpoint.in.pl #Estimate insertion frequencies #if [[ $SCORE -eq 0 ]] #then perl $BINDIR/pickOverlapPair.in.pl $i.insertion.refined.bp $INSERT > $i.insertion.refined.bp.summary #else # perl $BINDIR/pickOverlapPair.ex_MEM.pl $i.insertion.refined.bp $INSERT $SCORE > $i.insertion.refined.bp.summary #fi ################################ ##End of processing insertions## ################################