annotate microrunqc.xml @ 21:5083f8406e34 draft

Uploaded
author estrain
date Fri, 19 Jan 2024 11:47:29 +0000
parents b07d5fec5942
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
1 <tool id="microrunqc" name="microrunqc" version="1.0.2">
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
3 <requirements>
11
5eb0f25cf5d3 Uploaded
estrain
parents: 10
diff changeset
4 <requirement type="package" version="2.4.0">skesa</requirement>
5eb0f25cf5d3 Uploaded
estrain
parents: 10
diff changeset
5 <requirement type="package" version="2.23.0">mlst</requirement>
5eb0f25cf5d3 Uploaded
estrain
parents: 10
diff changeset
6 <requirement type="package" version="0.7.17">bwa</requirement>
5eb0f25cf5d3 Uploaded
estrain
parents: 10
diff changeset
7 <requirement type="package" version="1.0.1">fastq-scan</requirement>
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
8 </requirements>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
9
25a92dfb780a Uploaded
estrain
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
25a92dfb780a Uploaded
estrain
parents:
diff changeset
11
25a92dfb780a Uploaded
estrain
parents:
diff changeset
12 skesa
25a92dfb780a Uploaded
estrain
parents:
diff changeset
13
25a92dfb780a Uploaded
estrain
parents:
diff changeset
14 #set fqscan = "text"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
15 #if $jobtype.select == "fastq_fr"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
16 #set outname = $jobtype.fastq1.name
25a92dfb780a Uploaded
estrain
parents:
diff changeset
17 #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2)
25a92dfb780a Uploaded
estrain
parents:
diff changeset
18 --fastq $jobtype.fastq1,$jobtype.fastq2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
19 #if $jobtype.fastq1.is_of_type("fastq.gz")
25a92dfb780a Uploaded
estrain
parents:
diff changeset
20 #set fqscan = "gz"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
21 #else if $jobtype.fastq1.is_of_type("fastqsanger.gz")
25a92dfb780a Uploaded
estrain
parents:
diff changeset
22 #set fqscan = "gz"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
23 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
24 #else if $jobtype.select == "fastq_pair"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
25 #set outname = $jobtype.coll.name
25a92dfb780a Uploaded
estrain
parents:
diff changeset
26 #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse)
25a92dfb780a Uploaded
estrain
parents:
diff changeset
27 --fastq $jobtype.coll.forward,$jobtype.coll.reverse
25a92dfb780a Uploaded
estrain
parents:
diff changeset
28 #if $jobtype.coll.forward.is_of_type("fastq.gz")
25a92dfb780a Uploaded
estrain
parents:
diff changeset
29 #set fqscan = "gz"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
30 #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz")
25a92dfb780a Uploaded
estrain
parents:
diff changeset
31 #set fqscan = "gz"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
32 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
33 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
34
25a92dfb780a Uploaded
estrain
parents:
diff changeset
35 #set num_cores = 1
25a92dfb780a Uploaded
estrain
parents:
diff changeset
36
25a92dfb780a Uploaded
estrain
parents:
diff changeset
37 #if $options.select =="basic"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
38 --cores $num_cores
25a92dfb780a Uploaded
estrain
parents:
diff changeset
39 --memory 8
25a92dfb780a Uploaded
estrain
parents:
diff changeset
40 #else if $options.select=="advanced"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
41 #if $options.cores
25a92dfb780a Uploaded
estrain
parents:
diff changeset
42 #set num_cores = $options.cores
25a92dfb780a Uploaded
estrain
parents:
diff changeset
43 --cores $options.cores
25a92dfb780a Uploaded
estrain
parents:
diff changeset
44 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
45 #if $options.memory
25a92dfb780a Uploaded
estrain
parents:
diff changeset
46 --memory $options.memory
25a92dfb780a Uploaded
estrain
parents:
diff changeset
47 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
48 #if $options.hash_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
49 --hash_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
50 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
51 #if $options.estimated_kmers
25a92dfb780a Uploaded
estrain
parents:
diff changeset
52 --estimated_kmers $options.estimated.kmers
25a92dfb780a Uploaded
estrain
parents:
diff changeset
53 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
54 #if $options.skip
25a92dfb780a Uploaded
estrain
parents:
diff changeset
55 --skip_bloom_filter
25a92dfb780a Uploaded
estrain
parents:
diff changeset
56 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
57 #if $options.kmer
25a92dfb780a Uploaded
estrain
parents:
diff changeset
58 --kmer $options.kmer
25a92dfb780a Uploaded
estrain
parents:
diff changeset
59 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
60 #if $options.min_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
61 --min_count $options.min_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
62 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
63 #if $options.max_kmer_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
64 --max_kmer_count $options.max_kmer_count
25a92dfb780a Uploaded
estrain
parents:
diff changeset
65 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
66 #if $options.vector_percent
25a92dfb780a Uploaded
estrain
parents:
diff changeset
67 --vector_percent $options.vector_percent
25a92dfb780a Uploaded
estrain
parents:
diff changeset
68 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
69 #if $options.insert_size
25a92dfb780a Uploaded
estrain
parents:
diff changeset
70 --insert_size $options.insert.size
25a92dfb780a Uploaded
estrain
parents:
diff changeset
71 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
72 #if $options.steps
25a92dfb780a Uploaded
estrain
parents:
diff changeset
73 --steps $options.steps
25a92dfb780a Uploaded
estrain
parents:
diff changeset
74 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
75 #if $options.fraction
25a92dfb780a Uploaded
estrain
parents:
diff changeset
76 --fraction $options.fraction
25a92dfb780a Uploaded
estrain
parents:
diff changeset
77 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
78 #if $options.max_snp_len
25a92dfb780a Uploaded
estrain
parents:
diff changeset
79 --max_snp_len $options.max_snp_len
25a92dfb780a Uploaded
estrain
parents:
diff changeset
80 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
81 #if $options.min_contig
25a92dfb780a Uploaded
estrain
parents:
diff changeset
82 --min_contig $options.min_contig
25a92dfb780a Uploaded
estrain
parents:
diff changeset
83 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
84 #if $options.allow_snps
25a92dfb780a Uploaded
estrain
parents:
diff changeset
85 --allow_snps
25a92dfb780a Uploaded
estrain
parents:
diff changeset
86 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
87 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
88
25a92dfb780a Uploaded
estrain
parents:
diff changeset
89 > ${outname}.fasta;
25a92dfb780a Uploaded
estrain
parents:
diff changeset
90
25a92dfb780a Uploaded
estrain
parents:
diff changeset
91 bwa index ${outname}.fasta;
25a92dfb780a Uploaded
estrain
parents:
diff changeset
92 bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
25a92dfb780a Uploaded
estrain
parents:
diff changeset
93
16
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
94 mlst --nopath --threads $num_cores --datadir $mlst_databases.fields.path/pubmlst --blastdb $mlst_databases.fields.path/blast/mlst.fa
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
95 #if $options.select=="advanced"
25a92dfb780a Uploaded
estrain
parents:
diff changeset
96 #if $options.minid
25a92dfb780a Uploaded
estrain
parents:
diff changeset
97 --minid $options.minid
25a92dfb780a Uploaded
estrain
parents:
diff changeset
98 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
99 #if $options.mincov
25a92dfb780a Uploaded
estrain
parents:
diff changeset
100 --mincov $options.mincov
25a92dfb780a Uploaded
estrain
parents:
diff changeset
101 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
102 #if $options.minscore
25a92dfb780a Uploaded
estrain
parents:
diff changeset
103 --minscore $options.minscore
25a92dfb780a Uploaded
estrain
parents:
diff changeset
104 #end if
25a92dfb780a Uploaded
estrain
parents:
diff changeset
105 #end if
13
59e137488c63 Uploaded
estrain
parents: 12
diff changeset
106 ${outname}.fasta > ${outname}.mlst_raw.tsv;
59e137488c63 Uploaded
estrain
parents: 12
diff changeset
107
21
5083f8406e34 Uploaded
estrain
parents: 16
diff changeset
108 python $__tool_directory__/mlstAddFields.py ${outname}.mlst_raw.tsv $mlst_databases.fields.path/pubmlst > ${outname}.mlst.tsv;
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
109
25a92dfb780a Uploaded
estrain
parents:
diff changeset
110 python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
25a92dfb780a Uploaded
estrain
parents:
diff changeset
111
25a92dfb780a Uploaded
estrain
parents:
diff changeset
112 python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
25a92dfb780a Uploaded
estrain
parents:
diff changeset
113
25a92dfb780a Uploaded
estrain
parents:
diff changeset
114 ]]></command>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
115 <inputs>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
116 <conditional name="jobtype">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
117 <param name="select" type="select" label="Select Input">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
118 <option value="fastq_fr">Forward and Reverse FASTQ</option>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
119 <option value="fastq_pair">Paired FASTQ Collection</option>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
120 </param>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
121 <when value="fastq_fr">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
122 <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
123 <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
124 </when>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
125 <when value="fastq_pair">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
126 <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
127 </when>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
128 </conditional>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
129
25a92dfb780a Uploaded
estrain
parents:
diff changeset
130 <conditional name="options">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
131 <param name="select" type="select" label="Options Type">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
132 <option value="basic">Basic</option>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
133 <option value="advanced">Advanced</option>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
134 </param>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
135 <when value="advanced">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
136 <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
137 <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
138 <param name="hash_count" optional="true" type="boolean" label="hash counter"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
139 <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
140 <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
141 <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
142 <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
143 <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
144 <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value="">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
145 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
146 </param>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
147 <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
148 <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
149 <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value="">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
150 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
151 </param>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
152 <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
153 <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
154 <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
155 <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
156 <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
157 <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" />
25a92dfb780a Uploaded
estrain
parents:
diff changeset
158 </when>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
159 <when value="basic"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
160 </conditional>
16
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
161
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
162 <param name="mlst_databases" label="Select a mlst database" type="select">
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
163 <options from_data_table="mlst">
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
164 <validator message="No database is available" type="no_options" />
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
165 </options>
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
166 </param>
b07d5fec5942 Uploaded
estrain
parents: 15
diff changeset
167
2
25a92dfb780a Uploaded
estrain
parents:
diff changeset
168 </inputs>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
169 <outputs>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
170 <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
171 <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
172 <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
173 </outputs>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
174
25a92dfb780a Uploaded
estrain
parents:
diff changeset
175 <help><![CDATA[
25a92dfb780a Uploaded
estrain
parents:
diff changeset
176
25a92dfb780a Uploaded
estrain
parents:
diff changeset
177 ]]></help>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
178 <citations>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
179 <citation type="bibtex">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
180 @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
25a92dfb780a Uploaded
estrain
parents:
diff changeset
181 title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
25a92dfb780a Uploaded
estrain
parents:
diff changeset
182 based on DeBruijn graphs. It uses conservative heuristics and is designed to
25a92dfb780a Uploaded
estrain
parents:
diff changeset
183 create breaks at repeat regions in the genome. This leads to excellent sequence
25a92dfb780a Uploaded
estrain
parents:
diff changeset
184 quality but not necessarily a large N50 statistic. It is a multi-threaded
25a92dfb780a Uploaded
estrain
parents:
diff changeset
185 application that scales well with the number of processors. For different runs
25a92dfb780a Uploaded
estrain
parents:
diff changeset
186 with the same inputs, including the order of reads, the order and orientation
25a92dfb780a Uploaded
estrain
parents:
diff changeset
187 of contigs in the output is deterministic. },
25a92dfb780a Uploaded
estrain
parents:
diff changeset
188 url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
25a92dfb780a Uploaded
estrain
parents:
diff changeset
189 author={National Center for Biotechnology Information },
25a92dfb780a Uploaded
estrain
parents:
diff changeset
190 }</citation>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
191
25a92dfb780a Uploaded
estrain
parents:
diff changeset
192 <citation type="bibtex">
25a92dfb780a Uploaded
estrain
parents:
diff changeset
193 @UNPUBLISHED{Seemann2016,
25a92dfb780a Uploaded
estrain
parents:
diff changeset
194 author = "Seemann T",
25a92dfb780a Uploaded
estrain
parents:
diff changeset
195 title = "MLST: Scan contig files against PubMLST typing schemes",
25a92dfb780a Uploaded
estrain
parents:
diff changeset
196 year = "2016",
25a92dfb780a Uploaded
estrain
parents:
diff changeset
197 url = {https://github.com/tseemann/mlst}
25a92dfb780a Uploaded
estrain
parents:
diff changeset
198 }</citation>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
199 </citations>
25a92dfb780a Uploaded
estrain
parents:
diff changeset
200 </tool>