annotate smap_haplotype.xml @ 0:7d416d98d2c9 draft default tip

Uploaded
author ieguinoa
date Tue, 22 Mar 2022 13:49:39 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
1 <tool id="smap_haplotype" name="SMAP haplotype-sites (BETA)" version="4.5.0">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
2 <requirements>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
3 <requirement type="package">ngs-smap</requirement>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
4 </requirements>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
5 <command detect_errors="exit_code"><![CDATA[
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
6 ### haplotype sites
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
7 ## create output dir
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
8 #set $out_dir='haplotype_sites_output';
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
9 mkdir $out_dir;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
10 #set $out_base='hs_base'
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
11 ## create input dir for bam and bai files
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
12 mkdir alignments_dir;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
13 #set $input_list = []
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
14 #import re
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
15 #for $i, $input in enumerate($input_bams):
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
16 #set $safename = re.sub('[^\w\-_]', '_', $input.element_identifier)
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
17 #if $safename in $input_list:
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
18 #set $safename = str($safename) + "." + str($i)
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
19 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
20 ln -sf '${input}' 'alignments_dir/${safename}.bam' &&
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
21 ln -sf '${input.metadata.bam_index}' 'alignments_dir/${safename}.bai' &&
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
22 #end for
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
23 smap haplotype-sites alignments_dir $bed_sites_file $vcf_file
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
24 -mapping_orientation $mapping_orientation
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
25 -partial $partial
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
26 --min_read_count $min_read_count
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
27 $no_indels
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
28 --discrete_calls $discrete_calls
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
29 --min_haplotype_frequency $min_haplotype_frequency
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
30 -p "\${GALAXY_SLOTS:-1}"
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
31 --min_distinct_haplotypes $min_distinct_haplotypes
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
32 --plot_type png
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
33 --plot $plot
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
34 #if $frequency_interval_bounds:
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
35 --frequency_interval_bounds '$frequency_interval_bounds'
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
36 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
37 #if $dosage_filter:
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
38 --dosage_filter $dosage_filter
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
39 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
40 #if $locus_correctness:
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
41 --locus_correctness $locus_correctness
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
42 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
43 -o $out_dir/$out_base
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
44 ;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
45 ## mv main outputs to corresponding file destination
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
46 mv $out_dir/$out_base\_coordinates.tsv $coordinates;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
47 mv $out_dir/$out_base\_read_counts* $read_counts;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
48 #set $barplot_dir='barplots_out'
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
49 mkdir $barplot_dir;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
50 #if $plot != 'nothing':
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
51 mv $out_dir/$out_base\_haplotype*.barplot.png $barplot_dir;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
52 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
53 #set $haplotype_out_dir='haplotype_tsv_dir';
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
54 mkdir $haplotype_out_dir;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
55 mv $out_dir/$out_base\_haplotype* $haplotype_out_dir/;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
56 ## when using --plot all there is 1 plot per bam file named **..frequency.histogram.png
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
57 #if $plot == 'all':
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
58 ## I should use discover outputs, but can simplify it if I move them first to a specific dir and discover all png from there
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
59 mkdir frequency_plots;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
60 mv $out_dir/*.frequency.histogram.png frequency_plots;
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
61 #end if
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
62 ]]></command>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
63 <inputs>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
64 <param name="input_bams" type="data" optional="false" label="Select your alignments files" format="bam" multiple="true"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
65 <param argument="--mapping_orientation" type="select" label="Should strandedness of read mapping be considered for haplotyping?">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
66 <option value="ignore" selected="True">Ignore strandedness</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
67 <option value="stranded">Consider strandedness</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
68 </param>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
69 <param argument="--partial" type="select" label="Select if partial alignments should be excluded" help="Specify if reads are expected to be aligned at both outer positions of the locus (HiPlex, Shotgun SNPs in sliding frames) or if reads are expected to display read mapping polymorphisms along the locus (GBS, Shotgun SVs).">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
70 <option value="exclude" selected="True">Partially mapped reads are excluded</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
71 <option value="include">Include reads that only partially cover the locus</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
72 </param>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
73 <param name="bed_sites_file" type="data" optional="false" label="BED File" help="BED file" format="bed" multiple="false"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
74 <param name="vcf_file" type="data" optional="false" label="Variants positions File" format="vcf" multiple="false" help="Should be in VCFv4.2 format, containing variant positions. It should contain at least the first 9 columns listing the SNP positions, sample-specific genotype calls across the sampleset are not required. Positional mandatory argument, should be the third argument after smap haplotype-sites."/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
75 <param argument="--plot" type="select" label="Select which plots are to be generated.">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
76 <option value="summary" selected="True">Summary (plots with information for all samples)</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
77 <option value="all">All (per sample plots)</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
78 <option value="nothing">Nothing</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
79 </param>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
80 <param argument="--undefined_representation" type="text" label="Value to use for non-existing or masked data" value="NaN"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
81 <param argument="--min_mapping_quality" type="integer" value="30" label="Minimum .bam mapping quality to retain reads for analysis"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
82 <param argument="--no_indels" type="boolean" truevalue="--no_indels" falsevalue="" checked="false" label="Select true in this option if you want to exclude haplotypes that contain an InDel at the given SNP/SMAP positions." help="These reads are also ignored to evaluate the minimal read count"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
83 <param argument="--min_distinct_haplotypes" type="integer" value="0" label="Minimal number of distinct haplotypes per locus across all samples" help="Loci that do not fit this criterium are removed from the final output"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
84 <param argument="--max_distinct_haplotypes" type="text" value="inf" label="Maximal number of distinct haplotypes per locus across all samples. Loci that do not fit this criterium are removed from the final output"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
85 <param argument="--min_read_count" type="integer" value="0" label="Minimal total number of reads per locus per sample"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
86 <param argument="--max_read_count" type="text" value="inf" label="Maximal number of reads per locus per sample, read count is calculated after filtering out the low frequency haplotypes"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
87 <param argument="--min_haplotype_frequency" type="float" value="0" label="Set minimal HF (in pecentage) to retain the haplotype in the genotyping matrix" help="Haplotypes above this threshold in at least one of the samples are retained. Haplotypes that never reach this threshold in any of the samples are removed"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
88
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
89 <param argument="--mask_frequency" type="float" value="0" label="Mask haplotype frequency values below this threshold for individual samples to remove noise from the final output" help="Haplotype frequency values below this threshold are set to -u. Haplotypes are not removed based on this value, use --min_haplotype_frequency for this purpose instead."/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
90 <param argument="--discrete_calls" type="select" label="Discrete calls" help="Select dominant to transform haplotype frequency values into presence(1)/absence(0) calls per allele, or dosage to indicate the allele copy number">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
91 <option value="dominant" selected="True">Dominant</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
92 <option value="dosage">Dosage</option>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
93 </param>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
94 <param argument="--frequency_interval_bounds" type="text" optional="True" label="Frequency interval bounds for classifying the read frequencies into discrete calls" help="Custom thresholds can be defined by passing one or more space-separated integers or floats which represent relative frequencies in percentage. For dominant calling, one value should be specified. For dosage calling, an even total number of four or more thresholds should be specified"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
95 <param argument="--dosage_filter" type="integer" optional="True" label="Mask dosage calls in the loci for which the total allele count for a given locus at a given sample differs from the defined value" help="For example, in diploid organisms the total allele copy number must be 2, and in tetraploids the total allele copy number must be 4. (default no filtering)."/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
96 <param argument="--locus_correctness" type="integer" optional="True" label="Threshold value: % of samples with locus correctness." help="Create a new .bed file defining only the loci that were correctly dosage called in at least the defined percentage of samples (default no filtering)"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
97 </inputs>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
98 <outputs>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
99 <collection name="haplotype_frequencies" type="list" label="Haplotypes" >
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
100 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" ext="tsv" directory="haplotype_tsv_dir/"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
101 </collection>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
102 <data format="tsv" name="coordinates" label="Coordinates" />
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
103 <data format="tsv" name="read_counts" label="Read counts" />
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
104 <collection name="barplots" type="list" label="${tool.name} on ${on_string}: Barplots">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
105 <filter>plot == "all" or plot == "summary"</filter>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
106 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.png" ext="png" directory="barplots_out/"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
107 </collection>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
108 <collection name="frequencies_histograms" type="list" label="${tool.name} on ${on_string}: Frequencies histograms">
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
109 <filter>plot == "all"</filter>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
110 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.png" ext="png" directory="frequency_plots/"/>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
111 </collection>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
112 </outputs>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
113 <help><![CDATA[
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
114 TODO: Fill in help.
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
115 ]]></help>
7d416d98d2c9 Uploaded
ieguinoa
parents:
diff changeset
116 </tool>