Mercurial > repos > iuc > metaeuk_easy_predict
changeset 0:5f6ebc3103ee draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaeuk commit a1b87b5a34d0f3971f0110a89f5e87a5937dc8d2"
author | iuc |
---|---|
date | Tue, 04 Aug 2020 15:43:31 -0400 |
parents | |
children | f91548912113 |
files | metaeuk_easy_predict.xml test-data/contigs.fna test-data/output.fasta test-data/proteins.faa |
diffstat | 4 files changed, 110 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaeuk_easy_predict.xml Tue Aug 04 15:43:31 2020 -0400 @@ -0,0 +1,92 @@ +<tool id="metaeuk_easy_predict" name="MetaEuk Easy Predict" version="@TOOL_VERSION@+galaxy0"> + <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description> + <macros> + <token name="@TOOL_VERSION@">2.ddf2742</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + mkdir -p ./tmp && + metaeuk easy-predict + '$contigs' + '$query' + output.fasta + "./tmp/metaeuk_working" + --threads \${GALAXY_SLOTS:-1} + --min-length '${min_length}' + -e '${segment_eval}' + --metaeuk-eval '${metaeuk_eval}' + --metaeuk-tcov '${metaeuk_tcov}' + #if $adv.adv_options == "yes" + --max-intron '${adv.max_intron}' + --min-intron '${adv.min_intron}' + #end if + && mv output.fasta '$output' + ]]></command> + <inputs> + <param name="contigs" type="data" format="fasta" label="Contigs to search again" /> + <param name="query" type="data" format="fasta" label="Proteins to search against contigs" /> + <param argument="-e" name="segment_eval" type="float" value="100" label="Maximum e-value of individual match segment" /> + <param argument="--min-length" name="min_length" type="integer" value="15" label="Minimum number of codons in predicted open reading frame" /> + <param argument="--metaeuk-eval" name="metaeuk_eval" type="float" value="0.001" label="Maximum e-value of combined exon set" /> + <param argument="--metaeuk-tcov" name="metaeuk_tcov" type="float" value="0.5" label="Minimum length ratio of combined exon set to target" /> + <conditional name="adv"> + <param type="select" name="adv_options" label="Show advanced options"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param argument="--max-intron" name="max_intron" value="10000" type="integer" label="Maximum intron size" /> + <param argument="--min-intron" name="min_intron" value="15" type="integer" label="Minimum intron size" /> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="fasta" /> + </outputs> + + <tests> + <test> + <param name="contigs" ftype="fasta" value="contigs.fna" /> + <param name="query" ftype="fasta" value="proteins.faa" /> + <output name="output" ftype="fasta" value="output.fasta" /> + </test> + <test> + <param name="contigs" ftype="fasta" value="contigs.fna" /> + <param name="query" ftype="fasta" value="proteins.faa" /> + <conditional name="adv"> + <param name="adv_options" value="yes" /> + <param name="max_intron" value="1000" /> + </conditional> + <assert_command> + <has_text text="--max-intron '1000'" /> + </assert_command> + <output name="output" ftype="fasta" value="output.fasta" /> + </test> + </tests> + <help><![CDATA[ + + MetaEuk_ is a modular toolkit designed for large-scale gene discovery and + annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and + sensitive homology search capabilities of MMseqs2_ with a dynamic programming + procedure to recover optimal exons sets. It reduces redundancies in multiple + discoveries of the same gene and resolves conflicting gene predictions on + the same strand. + + This tool implements the easy-predict command from metaeuk, which combines + metaeuk modules into a pipeline for protein alignment prediction. Input is + the contigs you want to search for protein hits and the proteins you want + to search against those contigs. Output is FASTA format predicted ORFs, with + exons annotated in the header according to the metaeuk header format_. + + .. _MetaEuk: https://github.com/soedinglab/metaeuk + .. _MMseqs2: https://github.com/soedinglab/MMseqs2 + .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header + ]]></help> + <citations> + <citation type="doi">10.1186/s40168-020-00808-x</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/contigs.fna Tue Aug 04 15:43:31 2020 -0400 @@ -0,0 +1,4 @@ +>my_multi_exon_contig_1 +tcgtggagcgccgaggcgaaggatctgaggtcaccggcgctagaccgtctccgaccgtacattggagcacgacatgacctaagttcgtgataaatccaacATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTtatcctcctgaactagcacatgtaattctgggtcgttactagggtaatctACGAAGTCTCCCTTAACCGAGCAGCGCATCATGATCTTTGGTGCTGGTACTGCTGGTGTTGGTATCGCCAACCAAATTGTCGCTGGTATGGTGACAGATGGCCTTTCATTAGGTAAGGCTAGAGATAATCTTTTCATGATTGATCGATGCGGTTTGCTTCTGGAGAGACATGCTAAGATTGCTACTGATGGACAAAAGCCATTTTTGAAGAAGGACTCAGACTTTAAGGAAGTCCCTTCTGGAGACATTGATTTAGAGACTGCTATTTCACTAATCAAACCCACTGTTCTTCTGGGGTGCTCCGGTCAACCTGGAAAATTTACAGAAAAGGCCATTCGTGAAATGAGCAAGCATGTCAAACATCCCATCATCTTCCCAATCTCTAACCCCACCACTCTCATGctttgattagtttttacggcttctgttccacggttcgacccggcgagctttactggcaacgtatacacccccctcgtaccGAAGCAAAGCCCGTTCAAATTGACGAATGGTCTAATGGTAAAGCTTTGATGGCAACTGGTTCTCCACTTCCTCCTCTCACACGTAATGGTAAAGAATATGTGATTTCTCAATGCAATAATGCTCTTCTTTACCCTGCTCTAGGTGTTGCATGTGTGTTATCCCGTTGCAAATTGTTGAGCggtttctgagcccctgccgatagagaccactgagatggtgatctcggaaacaacgaatac +>my_multi_exon_contig_2 +tcgtggagcgccgaggcgaaggatctgaggtcaccggcgctagaccgtctccgaccgtacattggagcacgacatgacctaagttcgtgataaatccaacATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTtatcctcctgaactagcacatgtaattctgggtcgttactagggtaatctGATGGAATGCTGAAAGCAGCTTCCGATGCTTTGGCCACTGTTCCTAGATCTTTGTTTGTCGCCGATGAAGCTCTCTTGCCAGATCTGGACAATGCTCGAGAAATCTCTCGTCACATCGTTTTTGCGGTTTTGAAGCAAGCCATTTCTGAAGGAATGAGCACAGTGGATTTACCCAAAGATGATGCCAAGTTGAAGGAATGGATTATTGAACGAGAATGGAACCCTGAATACAGGAATTTTGTAgtaggctgccccatctcttaggtatgtaaagccgccaaattaacggtccagaacgagctgtgggatggttcgtagtggcagatgaacggg \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Tue Aug 04 15:43:31 2020 -0400 @@ -0,0 +1,4 @@ +>identical_all_3_exons|my_multi_exon_contig_1|+|629|7.362e-187|3|100|1141|100[100]:429[429]:330[330]|480[480]:881[881]:402[402]|944[968]:1141[1141]:198[174] +ATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTACGAAGTCTCCCTTAACCGAGCAGCGCATCATGATCTTTGGTGCTGGTACTGCTGGTGTTGGTATCGCCAACCAAATTGTCGCTGGTATGGTGACAGATGGCCTTTCATTAGGTAAGGCTAGAGATAATCTTTTCATGATTGATCGATGCGGTTTGCTTCTGGAGAGACATGCTAAGATTGCTACTGATGGACAAAAGCCATTTTTGAAGAAGGACTCAGACTTTAAGGAAGTCCCTTCTGGAGACATTGATTTAGAGACTGCTATTTCACTAATCAAACCCACTGTTCTTCTGGGGTGCTCCGGTCAACCTGGAAAATTTACAGAAAAGGCCATTCGTGAAATGAGCAAGCATGTCAAACATCCCATCATCTTCCCAATCTCTAACCCCACCACTCTCATGAAGCCCGTTCAAATTGACGAATGGTCTAATGGTAAAGCTTTGATGGCAACTGGTTCTCCACTTCCTCCTCTCACACGTAATGGTAAAGAATATGTGATTTCTCAATGCAATAATGCTCTTCTTTACCCTGCTCTAGGTGTTGCATGTGTGTTATCCCGTTGCAAATTGTTGAGC +>identical_all_2_exons|my_multi_exon_contig_2|+|402|1.588e-118|2|100|722|100[100]:429[429]:330[330]|480[480]:722[722]:243[243] +ATGACTTTATGCGCAGGTTTTGACCCTAATCGATTTTTGCCCATCGTTCTCGACGTTGGCACCAACAATGAAACCCATCGTAAAAATCATCAATACATGGGTTTAAGAAAGGACCGTGTTCACGGTGAGCAGTATGACATCTTTTTGGAGAACGTTATTAAAGCCATTCGTGAAGTCTTTCCCGAGGCCTTTATTCACTTTGAGGATTTCGGACTTAAAAATGCCAAAAGGATTTTAGACCACTATCGTCCTAATATTGCCTGTTTTAACGATGATATCCAGGGCACCGGTGCTGTAGCACTGGCCGCCATTATAGGCGCCCTTCATGTTGATGGAATGCTGAAAGCAGCTTCCGATGCTTTGGCCACTGTTCCTAGATCTTTGTTTGTCGCCGATGAAGCTCTCTTGCCAGATCTGGACAATGCTCGAGAAATCTCTCGTCACATCGTTTTTGCGGTTTTGAAGCAAGCCATTTCTGAAGGAATGAGCACAGTGGATTTACCCAAAGATGATGCCAAGTTGAAGGAATGGATTATTGAACGAGAATGGAACCCTGAATACAGGAATTTTGTA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/proteins.faa Tue Aug 04 15:43:31 2020 -0400 @@ -0,0 +1,10 @@ +>identical_all_3_exons +MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHVTKSPLTEQRIMIFGAGTAGVGIANQIVAGMVTDGLSLGKARDNLFMIDRCGLLLERHAKIATDGQKPFLKKDSDFKEVPSGDIDLETAISLIKPTVLLGCSGQPGKFTEKAIREMSKHVKHPIIFPISNPTTLMEAKPVQIDEWSNGKALMATGSPLPPLTRNGKEYVISQCNNALLYPALGVACVLSRCKLLS +>identical_all_2_exons +MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHVDGMLKAASDALATVPRSLFVADEALLPDLDNAREISRHIVFAVLKQAISEGMSTVDLPKDDAKLKEWIIEREWNPEYRNFV +>identical_exon1 +MTLCAGFDPNRFLPIVLDVGTNNETHRKNHQYMGLRKDRVHGEQYDIFLENVIKAIREVFPEAFIHFEDFGLKNAKRILDHYRPNIACFNDDIQGTGAVALAAIIGALHV +>identical_exon2 +TKSPLTEQRIMIFGAGTAGVGIANQIVAGMVTDGLSLGKARDNLFMIDRCGLLLERHAKIATDGQKPFLKKDSDFKEVPSGDIDLETAISLIKPTVLLGCSGQPGKFTEKAIREMSKHVKHPIIFPISNPTTLM +>identical_exon4 +DGMLKAASDALATVPRSLFVADEALLPDLDNAREISRHIVFAVLKQAISEGMSTVDLPKDDAKLKEWIIEREWNPEYRNFV \ No newline at end of file