view scripts/to_get_Y_variants.sh @ 0:05c27700e5ca

initial commit
author biomonika <biomonika@psu.edu>
date Thu, 04 Sep 2014 18:24:19 -0400
parents
children
line wrap: on
line source

#!/bin/bash
set -e;

#read config variables
echo "Reading config...." >&2
#echo `ls -l`
computer_name=`hostname`;
if [ $computer_name == "misa" ]; then
	#local_computer
	source config_devel;
else 
	#metacentrum machine
	source config_meta;
fi

#files needed: reference_male.fasta $reads_1 $reads_2

reference=$1;
reads_1="input/$2"; echo "reads_1: " $reads_1;
reads_2="input/$3"; echo "reads_2: " $reads_2;
name=$4;
fragments=$5;

#INDEXING FILES

if [ -e "references/reference_male.dict" ];
then
	echo "Reference exists. Not being created again.";
else
	echo "seconds of run: $SECONDS";
	java -jar $CreateSequenceDictionary R=$reference O=references/reference_male.dict >mlogfile 2>>errlogfile;
	echo "0/9 INDEXING FILES successful";
	echo "seconds of run: $SECONDS";
fi

if [ -e "references/reference_male.fai" ];
then
	echo "Samtools reference exists. Not being created again.";
else
	echo "seconds of run: $SECONDS";
	echo "reference:"$reference;
	samtools faidx $reference 2>>errlogfile;
	echo "0/9 INDEXING FILES successful";
	echo "seconds of run: $SECONDS";
fi

#1 MAPPING WITH BWA
bwa index $reference >>mlogfile 2>>errlogfile;

#differs based on the fact whether reads are single or paired-end
bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_${name}_male.sai & 2>>errlogfile;
wait;

if [[ $fragments =~ (single) ]]; then
    #single end reads
    bwa samse  $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile;
    echo "bwa samse  $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile";
else
    #pair-end reads
    bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile;
    echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile";
    wait;
    bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile;
    echo "bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile";
fi

wait;
rm -f bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai 


echo "1/6 MAPPING WITH BWA successful";
echo "seconds of run: $SECONDS";

#2 CLEANING FILES - adjust MAPQ scores
echo "$SECONDS";
java -jar $CleanSam I=bam/aln_${name}_male.sam O=bam/aln_cleaned_${name}_male.sam 2>>/dev/null;
echo "2/6 CLEANING FILES - adjust MAPQ scores successful";
echo "seconds of run: $SECONDS";
wait

#3 CONVERTING TO BAM FILE
samtools view -bS bam/aln_cleaned_${name}_male.sam > bam/aln_cleaned_${name}_male.bam 2>>errlogfile;
echo "3/6 CONVERTING TO BAM FILE successful";
echo "seconds of run: $SECONDS";
wait

#4 SORTING BAM FILE
samtools sort bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male >mlogfile 2>>errlogfile;
echo "4/6 SORTING BAM FILE successful";
echo "seconds of run: $SECONDS";
wait;

#5 REMOVING DUPLICATES
java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_${name}_male.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_${name}_male.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null;
echo "5/6 REMOVING DUPLICATES successful";
echo "seconds of run: $SECONDS";
wait;

#9 REMOVING FILES
rm -f bam/aln_${name}_male.sam bam/aln_cleaned_${name}_male.sam bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male.bam >>mlogfile 2>>errlogfile;
echo "6/6 REMOVING FILES successful";
echo "seconds of run: $SECONDS";
echo "DONE";