Mercurial > repos > iuc > miniprot
changeset 0:ef712a5e9834 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit 931e98e27ac60b189e2dfbb1c99767bd17860c5e
author | iuc |
---|---|
date | Mon, 19 Sep 2022 12:30:10 +0000 |
parents | |
children | ce04c239454b |
files | macros.xml miniprot.xml test-data/input_genome.fasta.gz test-data/input_query.fasta.gz |
diffstat | 4 files changed, 160 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Sep 19 12:30:10 2022 +0000 @@ -0,0 +1,3 @@ +<macros> + <token name="@TOOL_VERSION@">0.2</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/miniprot.xml Mon Sep 19 12:30:10 2022 +0000 @@ -0,0 +1,157 @@ +<?xml version="1.0"?> +<tool id="miniprot" name="Miniprot align" version="@TOOL_VERSION@+galaxy0" profile="21.05"> + <description>align a protein sequence against a genome with affine gap penalty, splicing and frameshift</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">miniprot</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + miniprot + -t \${GALAXY_SLOTS:-1} + #if str($adv.options) == "yes" + $adv.mapping.no_splicing + -c $adv.mapping.max_kmer + -G $adv.mapping.max_intron + -n $adv.mapping.min_syncmers + -m $adv.mapping.min_chain_score + -l $adv.mapping.second_round_kmer_size + -e $adv.mapping.max_extension + -p $adv.mapping.score_ratio + -N $adv.mapping.max_secondary_alignments + -O $adv.alignment.gap_open + -E $adv.alignment.gap_extension + -J $adv.alignment.intron_open + -C $adv.alignment.non_canonical_splice + -F $adv.alignment.frameshift + -B $adv.alignment.end_bonus + #end if + #if str($db.dbtype) == 'fasta' + '$db.genomic_fasta' + -k $db.kmer_size + -s $db.submer_size + -b $db.bits_per_block + #else + '$db.genomic_db' + #end if + #if str($output_format) == "gff" + --gff + #end if + '$protein_fasta' + >'$output_alignment' + ]]></command> + <inputs> + <conditional name="db"> + <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database"> + <option value="fasta" selected="true">FASTA</option> + <option value="preindexed">Pre-indexed</option> + </param> + <when value="fasta"> + <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" /> + <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size" /> + <param argument="-s" name="submer_size" type="integer" min="1" value="4" label="Submer size" help="Submer size (density: 1/(2*(kmer_size-submer_size)+1))" /> + <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Bits per block" /> + </when> + <when value="preindexed"> + <!-- refine the datatype here once Miniprot index data type is in Galaxy --> + <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" /> + </when> + </conditional> + <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" /> + <param name="output_format" type="select" label="Output format" > + <option value="gff" selected="true">GFF3</option> + <option value="paf">PAF</option> + </param> + <conditional name="adv"> + <param name="options" type="select" label="Advanced options"> + <option value="yes">Show</option> + <option value="no" selected="true">Hide</option> + </param> + <when value="yes"> + <section name="mapping" title="Mapping"> + <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> + <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> + <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> + <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> + <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> + <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> + <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> + <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" /> + <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" /> + </section> + <section name="alignment" title="Alignment"> + <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> + <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> + <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> + <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> + <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" /> + <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> + </section> + <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}"> + <change_format> + <when input="output_format" value="paf" format="paf" /> + </change_format> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="db"> + <param name="dbtype" value="fasta" /> + <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> + </conditional> + <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> + <output name="output_alignment" ftype="gff3"> + <assert_contents> + <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> + <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="db"> + <param name="dbtype" value="fasta" /> + <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> + </conditional> + <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> + <param name="output_format" value="paf" /> + <output name="output_alignment" ftype="paf"> + <assert_contents> + <has_text text="tr|O06302|O06302_MYCTU" /> + <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="db"> + <param name="dbtype" value="fasta" /> + <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> + </conditional> + <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> + <param name="output_format" value="gff" /> + <conditional name="adv"> + <param name="options" value="yes" /> + <param name="second_round_kmer_size" value="32" /> + </conditional> + <output name="output_alignment" ftype="gff3"> + <assert_contents> + <has_text text="##gff-version 3" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. + It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. + + **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage. + .. _miniprot: https://github.com/lh3/miniprot + ]]></help> +</tool>