Mercurial > repos > iuc > miniprot
changeset 1:ce04c239454b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit fbe24c1f66f23448d48a61c84a93cb73e0dbc779
author | iuc |
---|---|
date | Fri, 23 Sep 2022 22:35:23 +0000 |
parents | ef712a5e9834 |
children | d518cf04b55c |
files | macros.xml miniprot.xml |
diffstat | 2 files changed, 43 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Mon Sep 19 12:30:10 2022 +0000 +++ b/macros.xml Fri Sep 23 22:35:23 2022 +0000 @@ -1,3 +1,3 @@ <macros> - <token name="@TOOL_VERSION@">0.2</token> + <token name="@TOOL_VERSION@">0.3</token> </macros>
--- a/miniprot.xml Mon Sep 19 12:30:10 2022 +0000 +++ b/miniprot.xml Fri Sep 23 22:35:23 2022 +0000 @@ -26,6 +26,11 @@ -C $adv.alignment.non_canonical_splice -F $adv.alignment.frameshift -B $adv.alignment.end_bonus + #if str($adv.output.prefix) != 'MP' + -P '$adv.output.prefix' + #end if + $adv.output.print_unmapped_proteins + --outn=$adv.output.outputs_per_query #end if #if str($db.dbtype) == 'fasta' '$db.genomic_fasta' @@ -73,21 +78,35 @@ <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> + <!-- the -w option is mentioned in the help text but apparently not implmented: https://github.com/lh3/miniprot/issues/12 --> + <!-- <param argument="-w" name="log_gap_penalty_weight" type="float" value="0.75" label="Log gap penalty weight" /> --> <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> - <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" /> - <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" /> + <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.7" label="Minimum secondary-to-primary score ratio" /> + <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="50" label="Max secondary alignments to consider" /> </section> <section name="alignment" title="Alignment"> <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> - <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" /> + <param argument="-F" name="frameshift" type="integer" min="0" value="17" label="Frameshift penalty" /> <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> </section> + <section name="output" title="Output"> + <param argument="-P" name="prefix" type="text" label="Prefix for IDs in GFF3 output" value="MP"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_" /> + <add value="-" /> + </valid> + </sanitizer> + </param> + <param argument="-u" name="print_unmapped_proteins" type="boolean" truevalue="-u" falsevalue="" label="Print unmapped proteins" checked="false" /> + <param argument="--outn" name="outputs_per_query" type="integer" min="0" value="100" label="Outputs per query" help="The number of outputs will be the minimum of this and the max secondary alignments option" /> + </section> <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> </when> <when value="no"> @@ -111,7 +130,7 @@ <output name="output_alignment" ftype="gff3"> <assert_contents> <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> - <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> + <has_text text="ID=MP000359;Identity=0.9811;Positive=1.0000;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> </assert_contents> </output> </test> @@ -151,7 +170,25 @@ miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. - **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage. + While an index of the genome to be mapped to can be built "on the fly", the Miniprot index tool can pre-index a genome + and will result in faster performance if the genome index is reused multiple times. + + For details of the algorithm and some insight into how parameters can be tuned see this overview_. + .. _miniprot: https://github.com/lh3/miniprot + .. _overview: https://github.com/lh3/miniprot#algorithm-overview ]]></help> + <citations> + <citation type="bibtex"><![CDATA[ + @misc{Li2022, + author = {Li, Heng}, + title = {miniprot}, + year = {2022}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/lh3/miniprot}}, + commit = {b442b7a6b60dbd15f460ea9af75fa0b7293d4a8c} + } + ]]></citation> + </citations> </tool>