changeset 0:5f6ebc3103ee draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaeuk commit a1b87b5a34d0f3971f0110a89f5e87a5937dc8d2"
author iuc
date Tue, 04 Aug 2020 15:43:31 -0400
parents
children f91548912113
files metaeuk_easy_predict.xml test-data/contigs.fna test-data/output.fasta test-data/proteins.faa
diffstat 4 files changed, 110 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaeuk_easy_predict.xml	Tue Aug 04 15:43:31 2020 -0400
@@ -0,0 +1,92 @@
+<tool id="metaeuk_easy_predict" name="MetaEuk Easy Predict" version="@TOOL_VERSION@+galaxy0">
+    <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.ddf2742</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        mkdir -p ./tmp &&
+        metaeuk easy-predict 
+            '$contigs'
+            '$query'
+            output.fasta
+            "./tmp/metaeuk_working"
+            --threads \${GALAXY_SLOTS:-1}
+            --min-length '${min_length}'
+            -e '${segment_eval}'
+            --metaeuk-eval '${metaeuk_eval}'
+            --metaeuk-tcov '${metaeuk_tcov}'
+        #if $adv.adv_options == "yes"
+            --max-intron '${adv.max_intron}'
+            --min-intron '${adv.min_intron}'
+        #end if
+        && mv output.fasta '$output'
+    ]]></command>
+    <inputs>
+        <param name="contigs" type="data" format="fasta" label="Contigs to search again" />
+        <param name="query" type="data" format="fasta" label="Proteins to search against contigs" />
+        <param argument="-e" name="segment_eval" type="float" value="100" label="Maximum e-value of individual match segment" />
+        <param argument="--min-length" name="min_length" type="integer" value="15" label="Minimum number of codons in predicted open reading frame" />
+        <param argument="--metaeuk-eval" name="metaeuk_eval" type="float" value="0.001" label="Maximum e-value of combined exon set" />
+        <param argument="--metaeuk-tcov" name="metaeuk_tcov" type="float" value="0.5" label="Minimum length ratio of combined exon set to target" />
+        <conditional name="adv">
+            <param type="select"  name="adv_options" label="Show advanced options">
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param argument="--max-intron" name="max_intron" value="10000" type="integer" label="Maximum intron size" />
+                <param argument="--min-intron" name="min_intron" value="15" type="integer" label="Minimum intron size" />
+            </when>
+            <when value="no">
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="fasta" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="contigs" ftype="fasta" value="contigs.fna" />
+            <param name="query" ftype="fasta" value="proteins.faa" />
+            <output name="output" ftype="fasta" value="output.fasta" />
+        </test>
+        <test>
+            <param name="contigs" ftype="fasta" value="contigs.fna" />
+            <param name="query" ftype="fasta" value="proteins.faa" />
+            <conditional name="adv">
+                <param name="adv_options" value="yes" />
+                <param name="max_intron" value="1000" />
+            </conditional>
+            <assert_command>
+                <has_text text="--max-intron '1000'" />
+            </assert_command>            
+            <output name="output" ftype="fasta" value="output.fasta" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+    MetaEuk_ is a modular toolkit designed for large-scale gene discovery and
+    annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and
+    sensitive homology search capabilities of MMseqs2_ with a dynamic programming
+    procedure to recover optimal exons sets. It reduces redundancies in multiple
+    discoveries of the same gene and resolves conflicting gene predictions on
+    the same strand. 
+
+    This tool implements the easy-predict command from metaeuk, which combines
+    metaeuk modules into a pipeline for protein alignment prediction. Input is
+    the contigs you want to search for protein hits and the proteins you want
+    to search against those contigs. Output is FASTA format predicted ORFs, with
+    exons annotated in the header according to the metaeuk header format_.
+
+    .. _MetaEuk: https://github.com/soedinglab/metaeuk
+    .. _MMseqs2: https://github.com/soedinglab/MMseqs2
+    .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1186/s40168-020-00808-x</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs.fna	Tue Aug 04 15:43:31 2020 -0400
@@ -0,0 +1,4 @@
+>my_multi_exon_contig_1
+tcgtggagcgccgaggcgaaggatctgaggtcaccggcgctagaccgtctccgaccgtacattggagcacgacatgacctaagttcgtgataaatccaacATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTtatcctcctgaactagcacatgtaattctgggtcgttactagggtaatctACGAAGTCTCCCTTAACCGAGCAGCGCATCATGATCTTTGGTGCTGGTACTGCTGGTGTTGGTATCGCCAACCAAATTGTCGCTGGTATGGTGACAGATGGCCTTTCATTAGGTAAGGCTAGAGATAATCTTTTCATGATTGATCGATGCGGTTTGCTTCTGGAGAGACATGCTAAGATTGCTACTGATGGACAAAAGCCATTTTTGAAGAAGGACTCAGACTTTAAGGAAGTCCCTTCTGGAGACATTGATTTAGAGACTGCTATTTCACTAATCAAACCCACTGTTCTTCTGGGGTGCTCCGGTCAACCTGGAAAATTTACAGAAAAGGCCATTCGTGAAATGAGCAAGCATGTCAAACATCCCATCATCTTCCCAATCTCTAACCCCACCACTCTCATGctttgattagtttttacggcttctgttccacggttcgacccggcgagctttactggcaacgtatacacccccctcgtaccGAAGCAAAGCCCGTTCAAATTGACGAATGGTCTAATGGTAAAGCTTTGATGGCAACTGGTTCTCCACTTCCTCCTCTCACACGTAATGGTAAAGAATATGTGATTTCTCAATGCAATAATGCTCTTCTTTACCCTGCTCTAGGTGTTGCATGTGTGTTATCCCGTTGCAAATTGTTGAGCggtttctgagcccctgccgatagagaccactgagatggtgatctcggaaacaacgaatac
+>my_multi_exon_contig_2
+tcgtggagcgccgaggcgaaggatctgaggtcaccggcgctagaccgtctccgaccgtacattggagcacgacatgacctaagttcgtgataaatccaacATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTtatcctcctgaactagcacatgtaattctgggtcgttactagggtaatctGATGGAATGCTGAAAGCAGCTTCCGATGCTTTGGCCACTGTTCCTAGATCTTTGTTTGTCGCCGATGAAGCTCTCTTGCCAGATCTGGACAATGCTCGAGAAATCTCTCGTCACATCGTTTTTGCGGTTTTGAAGCAAGCCATTTCTGAAGGAATGAGCACAGTGGATTTACCCAAAGATGATGCCAAGTTGAAGGAATGGATTATTGAACGAGAATGGAACCCTGAATACAGGAATTTTGTAgtaggctgccccatctcttaggtatgtaaagccgccaaattaacggtccagaacgagctgtgggatggttcgtagtggcagatgaacggg
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fasta	Tue Aug 04 15:43:31 2020 -0400
@@ -0,0 +1,4 @@
+>identical_all_3_exons|my_multi_exon_contig_1|+|629|7.362e-187|3|100|1141|100[100]:429[429]:330[330]|480[480]:881[881]:402[402]|944[968]:1141[1141]:198[174]
+ATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTACGAAGTCTCCCTTAACCGAGCAGCGCATCATGATCTTTGGTGCTGGTACTGCTGGTGTTGGTATCGCCAACCAAATTGTCGCTGGTATGGTGACAGATGGCCTTTCATTAGGTAAGGCTAGAGATAATCTTTTCATGATTGATCGATGCGGTTTGCTTCTGGAGAGACATGCTAAGATTGCTACTGATGGACAAAAGCCATTTTTGAAGAAGGACTCAGACTTTAAGGAAGTCCCTTCTGGAGACATTGATTTAGAGACTGCTATTTCACTAATCAAACCCACTGTTCTTCTGGGGTGCTCCGGTCAACCTGGAAAATTTACAGAAAAGGCCATTCGTGAAATGAGCAAGCATGTCAAACATCCCATCATCTTCCCAATCTCTAACCCCACCACTCTCATGAAGCCCGTTCAAATTGACGAATGGTCTAATGGTAAAGCTTTGATGGCAACTGGTTCTCCACTTCCTCCTCTCACACGTAATGGTAAAGAATATGTGATTTCTCAATGCAATAATGCTCTTCTTTACCCTGCTCTAGGTGTTGCATGTGTGTTATCCCGTTGCAAATTGTTGAGC
+>identical_all_2_exons|my_multi_exon_contig_2|+|402|1.588e-118|2|100|722|100[100]:429[429]:330[330]|480[480]:722[722]:243[243]
+ATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTGATGGAATGCTGAAAGCAGCTTCCGATGCTTTGGCCACTGTTCCTAGATCTTTGTTTGTCGCCGATGAAGCTCTCTTGCCAGATCTGGACAATGCTCGAGAAATCTCTCGTCACATCGTTTTTGCGGTTTTGAAGCAAGCCATTTCTGAAGGAATGAGCACAGTGGATTTACCCAAAGATGATGCCAAGTTGAAGGAATGGATTATTGAACGAGAATGGAACCCTGAATACAGGAATTTTGTA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.faa	Tue Aug 04 15:43:31 2020 -0400
@@ -0,0 +1,10 @@
+>identical_all_3_exons
+MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHVTKSPLTEQRIMIFGAGTAGVGIANQIVAGMVTDGLSLGKARDNLFMIDRCGLLLERHAKIATDGQKPFLKKDSDFKEVPSGDIDLETAISLIKPTVLLGCSGQPGKFTEKAIREMSKHVKHPIIFPISNPTTLMEAKPVQIDEWSNGKALMATGSPLPPLTRNGKEYVISQCNNALLYPALGVACVLSRCKLLS
+>identical_all_2_exons
+MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHVDGMLKAASDALATVPRSLFVADEALLPDLDNAREISRHIVFAVLKQAISEGMSTVDLPKDDAKLKEWIIEREWNPEYRNFV
+>identical_exon1
+MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHV
+>identical_exon2
+TKSPLTEQRIMIFGAGTAGVGIANQIVAGMVTDGLSLGKARDNLFMIDRCGLLLERHAKIATDGQKPFLKKDSDFKEVPSGDIDLETAISLIKPTVLLGCSGQPGKFTEKAIREMSKHVKHPIIFPISNPTTLM
+>identical_exon4
+DGMLKAASDALATVPRSLFVADEALLPDLDNAREISRHIVFAVLKQAISEGMSTVDLPKDDAKLKEWIIEREWNPEYRNFV
\ No newline at end of file