0
|
1 #!/bin/bash
|
|
2
|
|
3 #LINKYX_PATH set during toolshed installation
|
|
4
|
|
5 results=$1 #X-linked candidates written here
|
|
6
|
|
7 reference=${2}
|
|
8 mother_male_bam=${3}
|
|
9 father_male_bam=${4}
|
|
10 daughter_male_bam=${5}
|
|
11 son_male_bam=${6}
|
|
12
|
|
13 results_fasta=${7}
|
|
14
|
|
15 mkdir -p Y references; dir="Y/";
|
|
16 grep ">" $reference | sed 's/>//g' >references/male_contig_names
|
|
17
|
|
18 file_exists ()
|
|
19 {
|
|
20 for ARG in "$@";
|
|
21 do
|
|
22 if [ ! -f ${ARG} ]; then
|
|
23 echo "File $ARG not found";
|
|
24 fi
|
|
25 done;
|
|
26 }
|
|
27
|
|
28 #index bam files
|
|
29
|
|
30 index_bam_file ()
|
|
31 {
|
|
32 for ARG in "$@";
|
|
33 do
|
|
34 bam_file_index=${ARG}".bai"
|
|
35 if [ -e $bam_file_index ];
|
|
36 then
|
|
37 echo "Index already exists. Not being created again.";
|
|
38 else
|
|
39 samtools index ${ARG};
|
|
40 fi
|
|
41 done;
|
|
42 }
|
|
43
|
|
44 #get statistics about mapped and unmapped reads for every contig
|
|
45
|
|
46 idxstats_bam_file ()
|
|
47 {
|
|
48 #get statistics about mapped and unmapped reads for every contig
|
|
49 echo "Idxstats" $1 $2;
|
|
50 stat_male_name=statistics/stat_male_${2};
|
|
51 samtools idxstats $1 >$stat_male_name #statistics for bam files mapping on male reference
|
|
52
|
|
53 #with MAPQ>0
|
|
54 samtools view -bq 1 $1 >statistics/high_score_male_${2}.bam
|
|
55 samtools index statistics/high_score_male_${2}.bam
|
|
56
|
|
57 stat_male_high_name=statistics/stat_male_${2}_high
|
|
58 samtools idxstats statistics/high_score_male_${2}.bam >$stat_male_high_name #statistics for bam files mapping on male reference with MAPQ>0
|
|
59 }
|
|
60
|
|
61 echo "Checking existence of input bam files."
|
|
62 file_exists $mother_male_bam $father_male_bam $son_male_bam $daughter_male_bam
|
|
63 index_bam_file $mother_male_bam $father_male_bam $son_male_bam $daughter_male_bam
|
|
64
|
|
65 #retrieve statistics
|
|
66 mkdir -p statistics;
|
|
67 idxstats_bam_file $mother_male_bam mother
|
|
68 idxstats_bam_file $father_male_bam father
|
|
69 idxstats_bam_file $daughter_male_bam daughter
|
|
70 idxstats_bam_file $son_male_bam son
|
|
71
|
|
72 #read statistics
|
|
73 reads_number_mother=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_mother`;
|
|
74 reads_number_father=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_father`;
|
|
75 reads_number_daughter=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_daughter`;
|
|
76 reads_number_son=`awk '{read_number+=($3+$4)} END {print read_number}' statistics/stat_male_son`;
|
|
77
|
|
78 echo "Reads numbers: " $reads_number_mother $reads_number_father $reads_number_daughter $reads_number_son
|
|
79
|
|
80 #filter contigs, which are candidate Y genes - very divergent from their X counterparts
|
|
81 perl ${LINKYX_PATH}/scripts/filter_diverged_Y_contigs.pl $reads_number_mother $reads_number_father $reads_number_daughter $reads_number_son $reference ${LINKYX_PATH} >Y_results.txt;
|
|
82
|
|
83 echo "Y_results: " >>$results
|
|
84
|
|
85 final_contigs=`cat Y_list | wc -l`
|
|
86 #have we found any genes?
|
|
87
|
|
88 if [ $final_contigs -gt 0 ]; then
|
|
89 #we found SOME genes
|
|
90 #retrieve sequences in fasta
|
|
91 perl ${LINKYX_PATH}/scripts/get_sequences_based_on_ids.pl $reference Y_list >Y_results_sequences.fasta;
|
|
92
|
|
93 get_results_in_bam_file ()
|
|
94 {
|
|
95 #candidates full_bam_file candidates_bam_file
|
|
96 cat Y_list | xargs samtools view -F 4 -b $1 > $2;
|
|
97 samtools index $2
|
|
98 }
|
|
99
|
|
100 get_results_in_bam_file $mother_male_bam ${8} #argument for output bam file mother
|
|
101 get_results_in_bam_file $father_male_bam ${9} #argument for output bam file father
|
|
102 get_results_in_bam_file $daughter_male_bam ${10} #argument for output bam file daughter
|
|
103 get_results_in_bam_file $son_male_bam ${11} #argument for output bam file son
|
|
104 wait;
|
|
105 else
|
|
106 #we found NO genes
|
|
107 echo "No sequences found." >Y_results.txt;
|
|
108 echo "No sequences found." >Y_results_sequences.fasta;
|
|
109 fi
|
|
110
|
|
111 #output results to Galaxy
|
|
112 cat Y_results.txt >>$results
|
|
113 cat Y_results_sequences.fasta >>$results_fasta
|
|
114 ls -lah */* >>$results
|