Mercurial > repos > greg > vsnp_determine_ref_from_data
comparison vsnp_determine_ref_from_data.xml @ 0:ebc08e5ce646 draft
Uploaded
author | greg |
---|---|
date | Tue, 21 Apr 2020 10:08:28 -0400 |
parents | |
children | bca267738b33 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ebc08e5ce646 |
---|---|
1 <tool id="vsnp_determine_ref_from_data" name="vSNP: determine reference" version="1.0.0"> | |
2 <description>from input data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.76">biopython</requirement> | |
5 <requirement type="package" version="5.3">pyyaml</requirement> | |
6 </requirements> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 #import os | |
9 #import re | |
10 #set $dnaprint_fields = $__app__.tool_data_tables['vsnp_dnaprints'].get_fields() | |
11 #set gzipped = 'false' | |
12 #set input_type = $input_type_cond.input_type | |
13 #set input_reads_dir = 'input_reads' | |
14 #set output_dbkey_dir = 'output_dbkey' | |
15 #set output_metrics_dir = 'output_metrics' | |
16 mkdir -p $input_reads_dir && | |
17 mkdir -p $output_dbkey_dir && | |
18 mkdir -p $output_metrics_dir && | |
19 #if str($input_type) == "single": | |
20 #set read_type_cond = $input_type_cond.read_type_cond | |
21 #set read1 = $read_type_cond.read1 | |
22 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) | |
23 #if str($read_type_cond.read_type) == "single": | |
24 ln -s '${read1}' '${read1_identifier}' && | |
25 #if $read1.is_of_type('fastqsanger.gz'): | |
26 #set gzipped = 'true' | |
27 #end if | |
28 #else: | |
29 #set read2 = $read_type_cond.read2 | |
30 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) | |
31 ln -s '${read1}' '${read1_identifier}' && | |
32 ln -s '${read2}' '${read2_identifier}' && | |
33 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'): | |
34 #set gzipped = 'true' | |
35 #end if | |
36 #end if | |
37 #else: | |
38 #for $i in $input_type_cond.reads_collection: | |
39 #if $i.is_of_type('fastqsanger.gz'): | |
40 #set gzipped = 'true' | |
41 #end if | |
42 #set filename = $i.file_name | |
43 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
44 ln -s $filename $input_reads_dir/$identifier && | |
45 #end for | |
46 #end if | |
47 python '$__tool_directory__/vsnp_determine_ref_from_data.py' | |
48 #if str($input_type) == "single": | |
49 #if str($read_type_cond.read_type) == "single": | |
50 --read1 '${read1_identifier}' | |
51 #else: | |
52 --read1 '${read1_identifier}' | |
53 --read2 '${read2_identifier}' | |
54 #end if | |
55 --output_dbkey '$output_dbkey' | |
56 --output_metrics '$output_metrics' | |
57 #end if | |
58 --gzipped $gzipped | |
59 --processes $processes | |
60 #for $i in $dnaprint_fields: | |
61 --dnaprint_fields '${i[0]}' '${i[2]}' | |
62 #end for | |
63 ]]></command> | |
64 <inputs> | |
65 <conditional name="input_type_cond"> | |
66 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | |
67 <option value="single" selected="true">Single files</option> | |
68 <option value="collection">Collections of files</option> | |
69 </param> | |
70 <when value="single"> | |
71 <conditional name="read_type_cond"> | |
72 <param name="read_type" type="select" label="Choose the read type"> | |
73 <option value="paired" selected="true">Paired</option> | |
74 <option value="single">Single</option> | |
75 </param> | |
76 <when value="paired"> | |
77 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
78 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> | |
79 </when> | |
80 <when value="single"> | |
81 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
82 </when> | |
83 </conditional> | |
84 </when> | |
85 <when value="collection"> | |
86 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/> | |
87 </when> | |
88 </conditional> | |
89 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/> | |
90 </inputs> | |
91 <outputs> | |
92 <data name="output_dbkey" format="txt" label="${tool.name} (dbkey) on ${on_string}"> | |
93 <filter>input_type_cond['input_type'] == 'single'</filter> | |
94 </data> | |
95 <data name="output_metrics" format="txt" label="${tool.name} (metrics) on ${on_string}"> | |
96 <filter>input_type_cond['input_type'] == 'single'</filter> | |
97 </data> | |
98 <collection name="output_dbkey_collection" type="list"> | |
99 <discover_datasets pattern="__name__" directory="output_dbkey" format="txt" /> | |
100 <filter>input_type_cond['input_type'] == 'collection'</filter> | |
101 </collection> | |
102 <collection name="output_metrics_collection" type="list"> | |
103 <discover_datasets pattern="__name__" directory="output_metrics" format="txt" /> | |
104 <filter>input_type_cond['input_type'] == 'collection'</filter> | |
105 </collection> | |
106 </outputs> | |
107 <tests> | |
108 <test> | |
109 <!-- Need to figure out how to test installed data tables --> | |
110 <param name="read1" value="reads.fastqsanger" ftype="fastqsanger" dbkey="89"/> | |
111 <param name="read2" value="read2.fastqsanger" ftype="fastqsanger" dbkey="89"/> | |
112 <output name="output_dbkey" file="output_dbkey.txt" ftype="txt"/> | |
113 <output name="output_metrics" file="output_metrics.txt" ftype="txt"/> | |
114 </test> | |
115 </tests> | |
116 <help> | |
117 **What it does** | |
118 | |
119 Accepts a single fastqsanger read, a set of paired reads, or a collections of reads and inspects the data to discover the | |
120 best reference genome for aligning the reads. This tool is, in essence, a DNA sniffer, and is the first Galaxy tool to | |
121 perform this task. While inspecting the data, a string of 0's and 1's is compiled based on the data contents, and we call | |
122 the complete string a "DNA print". All of the "DNA prints" files installed by the complementary **vSNP DNAprints data | |
123 manager** tool are then inspected to find a match for the compiled "DNA print" string. These files are each associated | |
124 with a Galaxy "dbkey" (i.e., genome build), so when a metach is found, the associated "dbkey" is passed to a mapper (e.g., | |
125 **Map with BWA-MEM**) to align the reads to the associated reference. | |
126 | |
127 The tool produces 2 text files, a "dbkey" file that contains the dbkey string and a "metrics" file that provides information | |
128 used to compile the "DNA print" string. | |
129 | |
130 This tool is important for samples containing bacterial species because many of the samples have a "mixed bag" of species, | |
131 and discovering the primary species is critical. DNA print matchig is currently supported for the following genomes. | |
132 | |
133 * Mycobacterium bovis AF2122/97 | |
134 * Brucella abortus bv. 1 str. 9-941 | |
135 * Brucella abortus strain BER | |
136 * Brucella canis ATCC 23365 | |
137 * Brucella ceti TE10759-12 | |
138 * Brucella melitensis bv. 1 str. 16M | |
139 * Brucella melitensis bv. 3 str. Ether | |
140 * Brucella melitensis BwIM_SOM_36b | |
141 * Brucella melitensis ATCC 23457 | |
142 * Brucella ovis ATCC 25840 | |
143 * Brucella suis 1330 | |
144 * Mycobacterium tuberculosis H37Rv | |
145 * Mycobacterium avium subsp. paratuberculosis strain Telford | |
146 * Mycobacterium avium subsp. paratuberculosis K-10 | |
147 * Brucella suis ATCC 23445 | |
148 * Brucella suis bv. 3 str. 686 | |
149 | |
150 **Required Options** | |
151 | |
152 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option. | |
153 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time. | |
154 </help> | |
155 <citations> | |
156 <citation type="bibtex"> | |
157 @misc{None, | |
158 journal = {None}, | |
159 author = {1. Stuber T}, | |
160 title = {Manuscript in preparation}, | |
161 year = {None}, | |
162 url = {https://github.com/USDA-VS/vSNP},} | |
163 </citation> | |
164 </citations> | |
165 </tool> | |
166 |