Mercurial > repos > biomonika > linkyx
comparison LINKYX_identify_Y_linked_SNPs.sh @ 0:05c27700e5ca
initial commit
author | biomonika <biomonika@psu.edu> |
---|---|
date | Thu, 04 Sep 2014 18:24:19 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:05c27700e5ca |
---|---|
1 #!/bin/bash | |
2 | |
3 #LINKYX_PATH set during toolshed installation | |
4 | |
5 results=$1 #X-linked candidates written here | |
6 | |
7 reference=${2} | |
8 mother_male_bam=${3} | |
9 father_male_bam=${4} | |
10 daughter_male_bam=${5} | |
11 son_male_bam=${6} | |
12 | |
13 results_fasta=${7} | |
14 | |
15 mkdir -p Y references; dir="Y/"; | |
16 grep ">" $reference | sed 's/>//g' >references/male_contig_names | |
17 | |
18 file_exists () | |
19 { | |
20 for ARG in "$@"; | |
21 do | |
22 if [ ! -f ${ARG} ]; then | |
23 echo "File $ARG not found"; | |
24 fi | |
25 done; | |
26 } | |
27 | |
28 #index bam files | |
29 | |
30 index_bam_file () | |
31 { | |
32 for ARG in "$@"; | |
33 do | |
34 bam_file_index=${ARG}".bai" | |
35 if [ -e $bam_file_index ]; | |
36 then | |
37 echo "Index already exists. Not being created again."; | |
38 else | |
39 samtools index ${ARG}; | |
40 fi | |
41 done; | |
42 } | |
43 | |
44 #get statistics about mapped and unmapped reads for every contig | |
45 | |
46 idxstats_bam_file () | |
47 { | |
48 #get statistics about mapped and unmapped reads for every contig | |
49 echo "Idxstats" $1 $2; | |
50 stat_male_name=statistics/stat_male_${2}; | |
51 samtools idxstats $1 >$stat_male_name #statistics for bam files mapping on male reference | |
52 | |
53 #with MAPQ>0 | |
54 samtools view -bq 1 $1 >statistics/high_score_male_${2}.bam | |
55 samtools index statistics/high_score_male_${2}.bam | |
56 | |
57 stat_male_high_name=statistics/stat_male_${2}_high | |
58 samtools idxstats statistics/high_score_male_${2}.bam >$stat_male_high_name #statistics for bam files mapping on male reference with MAPQ>0 | |
59 } | |
60 | |
61 echo "Checking existence of input bam files." | |
62 file_exists $mother_male_bam $father_male_bam $son_male_bam $daughter_male_bam | |
63 index_bam_file $mother_male_bam $father_male_bam $son_male_bam $daughter_male_bam | |
64 | |
65 #retrieve statistics | |
66 mkdir -p statistics; | |
67 idxstats_bam_file $mother_male_bam mother | |
68 idxstats_bam_file $father_male_bam father | |
69 idxstats_bam_file $daughter_male_bam daughter | |
70 idxstats_bam_file $son_male_bam son | |
71 | |
72 #read statistics | |
73 reads_number_mother=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_mother`; | |
74 reads_number_father=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_father`; | |
75 reads_number_daughter=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_daughter`; | |
76 reads_number_son=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_son`; | |
77 | |
78 echo "Reads numbers: " $reads_number_mother $reads_number_father $reads_number_daughter $reads_number_son | |
79 | |
80 #filter contigs, which are candidate Y genes - very divergent from their X counterparts | |
81 perl ${LINKYX_PATH}/scripts/filter_diverged_Y_contigs.pl $reads_number_mother $reads_number_father $reads_number_daughter $reads_number_son $reference ${LINKYX_PATH} >Y_results.txt; | |
82 | |
83 echo "Y_results: " >>$results | |
84 | |
85 final_contigs=`cat Y_list | wc -l` | |
86 #have we found any genes? | |
87 | |
88 if [ $final_contigs -gt 0 ]; then | |
89 #we found SOME genes | |
90 #retrieve sequences in fasta | |
91 perl ${LINKYX_PATH}/scripts/get_sequences_based_on_ids.pl $reference Y_list >Y_results_sequences.fasta; | |
92 | |
93 get_results_in_bam_file () | |
94 { | |
95 #candidates full_bam_file candidates_bam_file | |
96 cat Y_list | xargs samtools view -F 4 -b $1 > $2; | |
97 samtools index $2 | |
98 } | |
99 | |
100 get_results_in_bam_file $mother_male_bam ${8} #argument for output bam file mother | |
101 get_results_in_bam_file $father_male_bam ${9} #argument for output bam file father | |
102 get_results_in_bam_file $daughter_male_bam ${10} #argument for output bam file daughter | |
103 get_results_in_bam_file $son_male_bam ${11} #argument for output bam file son | |
104 wait; | |
105 else | |
106 #we found NO genes | |
107 echo "No sequences found." >Y_results.txt; | |
108 echo "No sequences found." >Y_results_sequences.fasta; | |
109 fi | |
110 | |
111 #output results to Galaxy | |
112 cat Y_results.txt >>$results | |
113 cat Y_results_sequences.fasta >>$results_fasta | |
114 ls -lah */* >>$results |