Mercurial > repos > iuc > miniprot
comparison miniprot.xml @ 2:d518cf04b55c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit bda011acaca773960c85bfa1288fa8b74cfabb15
author | iuc |
---|---|
date | Thu, 06 Apr 2023 09:20:48 +0000 |
parents | ce04c239454b |
children | 52bdc302299b |
comparison
equal
deleted
inserted
replaced
1:ce04c239454b | 2:d518cf04b55c |
---|---|
11 miniprot | 11 miniprot |
12 -t \${GALAXY_SLOTS:-1} | 12 -t \${GALAXY_SLOTS:-1} |
13 #if str($adv.options) == "yes" | 13 #if str($adv.options) == "yes" |
14 $adv.mapping.no_splicing | 14 $adv.mapping.no_splicing |
15 -c $adv.mapping.max_kmer | 15 -c $adv.mapping.max_kmer |
16 -G $adv.mapping.max_intron | |
17 -n $adv.mapping.min_syncmers | 16 -n $adv.mapping.min_syncmers |
18 -m $adv.mapping.min_chain_score | 17 -m $adv.mapping.min_chain_score |
19 -l $adv.mapping.second_round_kmer_size | 18 -l $adv.mapping.second_round_kmer_size |
20 -e $adv.mapping.max_extension | 19 -e $adv.mapping.max_extension |
21 -p $adv.mapping.score_ratio | 20 -p $adv.mapping.score_ratio |
22 -N $adv.mapping.max_secondary_alignments | 21 -N $adv.mapping.max_secondary_alignments |
22 -w $adv.mapping.log_gap_penalty_weight | |
23 -O $adv.alignment.gap_open | 23 -O $adv.alignment.gap_open |
24 -E $adv.alignment.gap_extension | 24 -E $adv.alignment.gap_extension |
25 -J $adv.alignment.intron_open | 25 -J $adv.alignment.intron_open |
26 -C $adv.alignment.non_canonical_splice | 26 -C $adv.alignment.non_canonical_splice |
27 -F $adv.alignment.frameshift | 27 -F $adv.alignment.frameshift |
28 -B $adv.alignment.end_bonus | 28 -B $adv.alignment.end_bonus |
29 -j $adv.alignment.splice_model | |
30 #if str($adv.mapping.intron_size.mode) == 'manual' | |
31 -G $adv.mapping.intron_size.max_intron | |
32 #elif str($adv.mapping.intron_size.mode) == 'auto' | |
33 -I | |
34 #end if | |
35 | |
29 #if str($adv.output.prefix) != 'MP' | 36 #if str($adv.output.prefix) != 'MP' |
30 -P '$adv.output.prefix' | 37 -P '$adv.output.prefix' |
31 #end if | 38 #end if |
32 $adv.output.print_unmapped_proteins | 39 $adv.output.print_unmapped_proteins |
33 --outn=$adv.output.outputs_per_query | 40 --outn=$adv.output.outputs_per_query |
41 --outc=$adv.output.output_fraction_query | |
42 --outs=$adv.output.output_score_least | |
43 $adv.output.output_residue_alignment | |
34 #end if | 44 #end if |
35 #if str($db.dbtype) == 'fasta' | 45 #if str($db.dbtype) == 'fasta' |
36 '$db.genomic_fasta' | 46 '$db.genomic_fasta' |
37 -k $db.kmer_size | 47 -k $db.kmer_size |
38 -s $db.submer_size | 48 -b $db.bits_per_block |
39 -b $db.bits_per_block | 49 -M $db.modimisers |
50 -L $db.min_ORF | |
40 #else | 51 #else |
41 '$db.genomic_db' | 52 '$db.genomic_db' |
42 #end if | 53 #end if |
43 #if str($output_format) == "gff" | 54 #if str($output_format) == "gff" |
44 --gff | 55 --gff |
45 #end if | 56 #else if str($output_format) == "gtf" |
46 '$protein_fasta' | 57 --gtf |
47 >'$output_alignment' | 58 #end if |
59 '$protein_fasta' > '$output_alignment' | |
48 ]]></command> | 60 ]]></command> |
49 <inputs> | 61 <inputs> |
50 <conditional name="db"> | 62 <conditional name="db"> |
51 <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database"> | 63 <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database"> |
52 <option value="fasta" selected="true">FASTA</option> | 64 <option value="fasta" selected="true">FASTA</option> |
53 <option value="preindexed">Pre-indexed</option> | 65 <option value="preindexed">Pre-indexed</option> |
54 </param> | 66 </param> |
55 <when value="fasta"> | 67 <when value="fasta"> |
56 <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" /> | 68 <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" /> |
57 <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size" /> | 69 <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size for genome-wide indexing" /> |
58 <param argument="-s" name="submer_size" type="integer" min="1" value="4" label="Submer size" help="Submer size (density: 1/(2*(kmer_size-submer_size)+1))" /> | 70 <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Number of bits per bin" help="Miniprot splits the genome into non-overlapping bins of 2^8 bp in size" /> |
59 <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Bits per block" /> | 71 <param argument="-M" name="modimisers" type="integer" value="1" label="Sample k-mers at a rate 1/2**INT" help="Increasing this option reduces peak memory but decreases sensitivity" /> |
72 <param argument="-L" name="min_ORF" type="integer" value="30" label="Minimum ORF length to index" /> | |
60 </when> | 73 </when> |
61 <when value="preindexed"> | 74 <when value="preindexed"> |
62 <!-- refine the datatype here once Miniprot index data type is in Galaxy --> | 75 <!-- refine the datatype here once Miniprot index data type is in Galaxy --> |
63 <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" /> | 76 <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" /> |
64 </when> | 77 </when> |
65 </conditional> | 78 </conditional> |
66 <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" /> | 79 <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" /> |
67 <param name="output_format" type="select" label="Output format" > | 80 <param name="output_format" type="select" label="Output format" > |
68 <option value="gff" selected="true">GFF3</option> | 81 <option value="gff" selected="true">GFF3</option> |
69 <option value="paf">PAF</option> | 82 <option value="paf">PAF</option> |
83 <option value="gtf">GTF</option> | |
70 </param> | 84 </param> |
71 <conditional name="adv"> | 85 <conditional name="adv"> |
72 <param name="options" type="select" label="Advanced options"> | 86 <param name="options" type="select" label="Advanced options"> |
73 <option value="yes">Show</option> | 87 <option value="yes">Show</option> |
74 <option value="no" selected="true">Hide</option> | 88 <option value="no" selected="true">Hide</option> |
75 </param> | 89 </param> |
76 <when value="yes"> | 90 <when value="yes"> |
77 <section name="mapping" title="Mapping"> | 91 <section name="mapping" title="Mapping"> |
78 <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> | 92 <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> |
79 <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> | 93 <param argument="-c" name="max_kmer" type="integer" min="1" value="20000" label="Max k-mer occurences" /> |
80 <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> | 94 <param argument="-w" name="log_gap_penalty_weight" type="float" min="0" max="1" value="0.75" label="Log gap penalty weight" /> |
81 <!-- the -w option is mentioned in the help text but apparently not implmented: https://github.com/lh3/miniprot/issues/12 --> | 95 <param argument="-n" name="min_syncmers" type="integer" min="1" value="3" label="Minimum number of syncmers in a chain" /> |
82 <!-- <param argument="-w" name="log_gap_penalty_weight" type="float" value="0.75" label="Log gap penalty weight" /> --> | |
83 <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> | |
84 <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> | 96 <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> |
85 <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> | 97 <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> |
86 <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> | 98 <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> |
87 <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.7" label="Minimum secondary-to-primary score ratio" /> | 99 <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.7" label="Minimum secondary-to-primary score ratio" /> |
88 <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="50" label="Max secondary alignments to consider" /> | 100 <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="30" label="Max secondary alignments to consider" /> |
101 <conditional name="intron_size"> | |
102 <param name="mode" type="select" label="Maximum intron size"> | |
103 <option value="manual" selected="true">Manual</option> | |
104 <option value="auto">Auto (3.6*sqrt)</option> | |
105 </param> | |
106 <when value="manual"> | |
107 <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Maximum intron size" /> | |
108 </when> | |
109 <when value="auto" /> | |
110 </conditional> | |
89 </section> | 111 </section> |
90 <section name="alignment" title="Alignment"> | 112 <section name="alignment" title="Alignment"> |
91 <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> | 113 <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> |
92 <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> | 114 <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> |
93 <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> | 115 <param argument="-J" name="intron_open" type="integer" min="0" value="29" label="Intron open penalty" /> |
94 <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> | 116 <param argument="-C" name="non_canonical_splice" type="float" value="1" label="Weight of splice penalty; 0 to ignore splice signals" /> |
95 <param argument="-F" name="frameshift" type="integer" min="0" value="17" label="Frameshift penalty" /> | 117 <param argument="-F" name="frameshift" type="integer" min="0" value="23" label="Frameshift penalty" /> |
96 <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> | 118 <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="Bonus score for alignment reaching query ends" /> |
119 <param argument="-j" name="splice_model" type="select" label="Splicing model for the target genome" help="2=mammal, 1=general, 0=none" > | |
120 <option value="O" >None: No splicing model (0)</option> | |
121 <option value="1" selected="true">General: Optimal splicing sequence: '|GTR...YAG|' (1)</option> | |
122 <option value="2">Mammal: Optimal splicing sequence: 'G|GTR...YYYNYAG|' (2)</option> | |
123 </param> | |
97 </section> | 124 </section> |
98 <section name="output" title="Output"> | 125 <section name="output" title="Output"> |
99 <param argument="-P" name="prefix" type="text" label="Prefix for IDs in GFF3 output" value="MP"> | 126 <param argument="-P" name="prefix" type="text" label="Prefix for IDs in GFF3 output" value="MP"> |
100 <sanitizer invalid_char=""> | 127 <sanitizer invalid_char=""> |
101 <valid initial="string.ascii_letters,string.digits"> | 128 <valid initial="string.ascii_letters,string.digits"> |
104 </valid> | 131 </valid> |
105 </sanitizer> | 132 </sanitizer> |
106 </param> | 133 </param> |
107 <param argument="-u" name="print_unmapped_proteins" type="boolean" truevalue="-u" falsevalue="" label="Print unmapped proteins" checked="false" /> | 134 <param argument="-u" name="print_unmapped_proteins" type="boolean" truevalue="-u" falsevalue="" label="Print unmapped proteins" checked="false" /> |
108 <param argument="--outn" name="outputs_per_query" type="integer" min="0" value="100" label="Outputs per query" help="The number of outputs will be the minimum of this and the max secondary alignments option" /> | 135 <param argument="--outn" name="outputs_per_query" type="integer" min="0" value="100" label="Outputs per query" help="The number of outputs will be the minimum of this and the max secondary alignments option" /> |
136 <param argument="--aln" name="output_residue_alignment" type="boolean" truevalue="--aln" falsevalue="" checked="false" label="Output residue alignment" help="Only for GFF output" /> | |
137 <param argument="--outs" name="output_score_least" type="float" min="0" max="1" value="0.99" | |
138 label="For each protein, only output alignments with a score higher than 'best_score' multiplied by this value"/> | |
139 <param argument="--outc" name="output_fraction_query" type="float" value="0.1" label="Output if at least this fraction of query is aligned" /> | |
109 </section> | 140 </section> |
110 <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> | 141 <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> |
111 </when> | 142 </when> |
112 <when value="no"> | 143 <when value="no"> |
113 </when> | 144 </when> |
115 </inputs> | 146 </inputs> |
116 <outputs> | 147 <outputs> |
117 <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}"> | 148 <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}"> |
118 <change_format> | 149 <change_format> |
119 <when input="output_format" value="paf" format="paf" /> | 150 <when input="output_format" value="paf" format="paf" /> |
151 <when input="output_format" value="gtf" format="gtf"/> | |
120 </change_format> | 152 </change_format> |
121 </data> | 153 </data> |
122 </outputs> | 154 </outputs> |
123 <tests> | 155 <tests> |
124 <test expect_num_outputs="1"> | 156 <test expect_num_outputs="1"> |
127 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> | 159 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> |
128 </conditional> | 160 </conditional> |
129 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> | 161 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> |
130 <output name="output_alignment" ftype="gff3"> | 162 <output name="output_alignment" ftype="gff3"> |
131 <assert_contents> | 163 <assert_contents> |
132 <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> | 164 <has_text text="ID=MP000001;Rank=1;Identity=0.3420;Positive=0.5104;Target=tr|I6YGH7|I6YGH7_MYCTU 1 375" /> |
133 <has_text text="ID=MP000359;Identity=0.9811;Positive=1.0000;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> | 165 <has_text text="ID=MP000066;Rank=1;Identity=0.3613;Positive=0.5178;Target=sp|P9WQE5|PPSB_MYCTU 1 1214" /> |
134 </assert_contents> | 166 </assert_contents> |
135 </output> | 167 </output> |
136 </test> | 168 </test> |
137 <test expect_num_outputs="1"> | 169 <test expect_num_outputs="1"> |
138 <conditional name="db"> | 170 <conditional name="db"> |
145 <assert_contents> | 177 <assert_contents> |
146 <has_text text="tr|O06302|O06302_MYCTU" /> | 178 <has_text text="tr|O06302|O06302_MYCTU" /> |
147 <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" /> | 179 <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" /> |
148 </assert_contents> | 180 </assert_contents> |
149 </output> | 181 </output> |
150 </test> | 182 </test> |
151 <test expect_num_outputs="1"> | 183 <test expect_num_outputs="1"> |
152 <conditional name="db"> | 184 <conditional name="db"> |
153 <param name="dbtype" value="fasta" /> | 185 <param name="dbtype" value="fasta" /> |
154 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> | 186 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> |
155 </conditional> | 187 </conditional> |
163 <assert_contents> | 195 <assert_contents> |
164 <has_text text="##gff-version 3" /> | 196 <has_text text="##gff-version 3" /> |
165 </assert_contents> | 197 </assert_contents> |
166 </output> | 198 </output> |
167 </test> | 199 </test> |
200 | |
201 <test expect_num_outputs="1"> | |
202 <conditional name="db"> | |
203 <param name="dbtype" value="fasta"></param> | |
204 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta"></param> | |
205 </conditional> | |
206 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta"></param> | |
207 <param name="output_format" value="gtf"></param> | |
208 <conditional name="adv"> | |
209 <param name="options" value="yes"></param> | |
210 <param name="second_round_kmer_size" value="32"></param> | |
211 </conditional> | |
212 <output name="output_alignment" ftype="gtf"> | |
213 <assert_contents> | |
214 <has_text text="NC_000962.3" /> | |
215 <has_text text='transcript_id "MPT000004"; gene_id "MPG000004"' /> | |
216 </assert_contents> | |
217 </output> | |
218 </test> | |
219 | |
220 | |
168 </tests> | 221 </tests> |
169 <help><![CDATA[ | 222 <help><![CDATA[ |
170 miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. | 223 miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. |
171 It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. | 224 It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. |
172 | 225 |
175 | 228 |
176 For details of the algorithm and some insight into how parameters can be tuned see this overview_. | 229 For details of the algorithm and some insight into how parameters can be tuned see this overview_. |
177 | 230 |
178 .. _miniprot: https://github.com/lh3/miniprot | 231 .. _miniprot: https://github.com/lh3/miniprot |
179 .. _overview: https://github.com/lh3/miniprot#algorithm-overview | 232 .. _overview: https://github.com/lh3/miniprot#algorithm-overview |
180 ]]></help> | 233 ]]></help> |
181 <citations> | 234 <expand macro="citation"></expand> |
182 <citation type="bibtex"><![CDATA[ | |
183 @misc{Li2022, | |
184 author = {Li, Heng}, | |
185 title = {miniprot}, | |
186 year = {2022}, | |
187 publisher = {GitHub}, | |
188 journal = {GitHub repository}, | |
189 howpublished = {\url{https://github.com/lh3/miniprot}}, | |
190 commit = {b442b7a6b60dbd15f460ea9af75fa0b7293d4a8c} | |
191 } | |
192 ]]></citation> | |
193 </citations> | |
194 </tool> | 235 </tool> |