changeset 0:de3a45e9ccce draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/emboss commit eaefd81cf7afec08ffc570ac53d6718d62743a99"
author brinkmanlab
date Tue, 25 Jan 2022 03:26:28 +0000
parents
children 40774082f93e
files emboss_format_corrector.py emboss_fuzznuc.xml macros.xml test-data/2.fasta test-data/emboss_fuzznuc_out.tabular test-data/emboss_fuzznuc_out2.tabular
diffstat 6 files changed, 288 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/emboss_format_corrector.py	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,47 @@
+# EMBOSS format corrector
+import operator
+
+
+# Properly set file formats before job run
+def exec_before_job(app, inp_data=None, out_data=None, tool=None, param_dict=None):
+    # why isn't items an ordered list?
+    items = out_data.items()
+    items = sorted(items, key=operator.itemgetter(0))
+
+    # normal filetype correction
+    data_count = 1
+    for name, data in items:
+        outputType = param_dict.get('out_format' + str(data_count), None)
+        if outputType is not None:
+            if outputType == 'ncbi':
+                outputType = "fasta"
+            elif outputType == 'excel':
+                outputType = "tabular"
+            elif outputType == 'text':
+                outputType = "txt"
+            data = app.datatypes_registry.change_datatype(data, outputType)
+            app.model.context.add(data)
+            app.model.context.flush()
+        data_count += 1
+
+    # html filetype correction
+    data_count = 1
+    for name, data in items:
+        wants_plot = param_dict.get('html_out' + str(data_count), None)
+        ext = "html"
+        if wants_plot == "yes":
+            data = app.datatypes_registry.change_datatype(data, ext)
+            app.model.context.add(data)
+            app.model.context.flush()
+        data_count += 1
+
+    # png file correction
+    data_count = 1
+    for name, data in items:
+        wants_plot = param_dict.get('plot' + str(data_count), None)
+        ext = "png"
+        if wants_plot == "yes":
+            data = app.datatypes_registry.change_datatype(data, ext)
+            app.model.context.add(data)
+            app.model.context.flush()
+        data_count += 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/emboss_fuzznuc.xml	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,133 @@
+<tool id="EMBOSS: fuzznuc37" name="fuzznuc" version="6.6.0">
+  <description>Nucleic acid pattern search</description>
+  <expand macro="bio_tools" />
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <code file="emboss_format_corrector.py" />
+  <command><![CDATA[
+    fuzznuc
+
+    -sequence '$input1'
+    $seq_qualifiers.sreverse1 $seq_qualifiers.snucleotide1 $seq_qualifiers.sprotein1 $seq_qualifiers.slower1 $seq_qualifiers.supper1 $seq_qualifiers.scircular1
+    #if $seq_qualifiers.sbegin1
+      -sbegin1 '$seq_qualifiers.sbegin1'
+    #end if
+    #if $seq_qualifiers.send1
+      -send1 '$seq_qualifiers.send1'
+    #end if
+    #if $seq_qualifiers.ioffset1
+      -ioffset1 '$seq_qualifiers.ioffset1'
+    #end if
+
+    -outfile '$out_file1'
+    $out_qualifiers.raccshow2 $out_qualifiers.rdesshow2 $out_qualifiers.rscoreshow2 $out_qualifiers.rusashow2 $out_qualifiers.rstrandshow2
+    #if $out_qualifiers.rmaxall2
+      -rmaxall2 '$out_qualifiers.rmaxall2'
+    #end if
+    #if $out_qualifiers.rmaxseq2
+      -rmaxseq2 '$out_qualifiers.rmaxseq2'
+    #end if
+
+    -pattern '$pattern' -pmismatch '$mismatch' -rformat2 '$out_format1' -auto
+
+    -complement '$complement'
+  ]]></command>
+  <inputs>
+    <param name="input1" type="data" format="fasta" label="Sequences" />
+    <param name="pattern" type="text" value="" label="Search pattern">
+      <sanitizer>
+        <valid initial="string.printable">
+         <remove value="&apos;"/>
+        </valid>
+        <mapping initial="none">
+          <add source="&apos;" target=""/>
+        </mapping>
+      </sanitizer>
+    </param>
+    <param name="mismatch" type="integer" value="0" label="Number of mismatches" />
+    <param name="complement" type="select" label="Search complementary strand">
+      <option value="no">No</option>
+      <option value="yes">Yes</option>
+    </param>
+    <param name="out_format1" type="select" label="Output report file format">
+      <option value="seqtable">SeqTable</option>
+      <option value="embl">EMBL</option>
+      <option value="genbank">GENBANK</option>
+      <option value="gff">GFF</option>
+      <option value="pir">PIR</option>
+      <option value="swiss">SwissProt</option>
+      <option value="dbmotif">DbMotif</option>
+      <option value="diffseq">Diffseq</option>
+      <option value="excel">Excel (tab delimited)</option>
+      <option value="feattable">FeatTable</option>
+      <option value="motif">Motif</option>
+      <option value="regions">Regions</option>
+      <option value="simple">SRS Simple</option>
+      <option value="fuzznuc">Fuzznuc Output File</option>
+      <option value="srs">SRS</option>
+      <option value="table">Table</option>
+      <option value="tagseq">TagSeq</option>
+    </param>
+
+    <section name="seq_qualifiers" title="Sequence associated qualifiers">
+      <param type="integer" argument="-sbegin1" optional="true" label="Start of each sequence to be used"/>
+      <param type="integer" argument="-send1" optional="true" label="End of each sequence to be used"/>
+      <param type="integer" argument="-ioffset1" optional="true" label="Input start position offset"/>
+      <param type="boolean" argument="-sreverse1" truevalue="-sreverse1 Y" falsevalue="" label="Reverse (if DNA)"/>
+      <param type="boolean" argument="-snucleotide1" truevalue="-snucleotide1 Y" falsevalue="" label="Sequence is nucleotide"/>
+      <param type="boolean" argument="-sprotein1" truevalue="-sprotein1 Y" falsevalue="" label="Sequence is protein"/>
+      <param type="boolean" argument="-slower1" truevalue="-slower1 Y" falsevalue="" label="Make lower case"/>
+      <param type="boolean" argument="-supper1" truevalue="-supper1 Y" falsevalue="" label="Make upper case"/>
+      <param type="boolean" argument="-scircular1" truevalue="-scircular1 Y" falsevalue="" label="Sequence is circular"/>
+    </section>
+
+    <section name="out_qualifiers" title="Outfile associated qualifiers">
+      <param type="boolean" argument="-raccshow2" truevalue="-raccshow2 Y" falsevalue="" label="Show accession number in the report"/>
+      <param type="boolean" argument="-rdesshow2" truevalue="-rdesshow2 Y" falsevalue="" label="Show description in the report"/>
+      <param type="boolean" argument="-rscoreshow2" truevalue="-rscoreshow2 Y" falsevalue="" label="Show the score in the report"/>
+      <param type="boolean" argument="-rstrandshow2" truevalue="-rstrandshow2 Y" falsevalue="" label="Show the nucleotide strand in the report"/>
+      <param type="boolean" argument="-rusashow2" truevalue="-rusashow2 Y" falsevalue="" label="Show the full USA in the report"/>
+      <param type="integer" argument="-rmaxall2" optional="true" label="Maximum total hits to report"/>
+      <param type="integer" argument="-rmaxseq2" optional="true" label="Maximum hits to report for one sequence"/>
+    </section>
+
+  </inputs>
+  <outputs>
+    <data name="out_file1" format="fuzznuc" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="2.fasta"/>
+      <param name="pattern" value="AA"/>
+      <param name="mismatch" value="0"/>
+      <param name="complement" value="no"/>
+      <param name="out_format1" value="excel"/>
+      <output name="out_file1" file="emboss_fuzznuc_out.tabular"/>
+    </test>
+    <test>
+      <param name="input1" value="2.fasta"/>
+      <param name="pattern" value="AA"/>
+      <param name="mismatch" value="0"/>
+      <param name="complement" value="no"/>
+      <param name="out_format1" value="excel"/>
+      <param name="seq_qualifiers|supper1" value="true"/>
+      <param name="seq_qualifiers|sbegin1" value="100"/>
+      <param name="out_qualifiers|rmaxseq2" value="5"/>
+      <output name="out_file1" file="emboss_fuzznuc_out2.tabular"/>
+    </test>
+  </tests>
+  <help>
+.. class:: warningmark
+
+The input dataset needs to be sequences.
+
+-----
+
+    You can view the original documentation here_.
+
+    .. _here: http://galaxy-iuc.github.io/emboss-5.0-docs/fuzznuc.html
+  </help>
+  <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,50 @@
+<macros>
+    <token name="@VERSION@">6.6.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">emboss</requirement>
+            <requirement type="package" version="5.26">perl</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <regex level="fatal_oom" match="insufficient memory available" source="both" />
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">emboss</xref>
+        </xrefs>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1016/S0168-9525(00)02024-2</citation>
+            <citation type="doi">10.1101/gr.5578007</citation>
+            <yield />
+        </citations>
+    </xml>
+    <xml name="regex_sanitizer">
+        <sanitizer>
+            <valid initial="string.ascii_letters,string.digits">
+                <add value="^"/>
+                <add value="$"/>
+                <add value="("/>
+                <add value=")"/>
+                <add value="|"/>
+                <add value="?"/>
+                <add value="*"/>
+                <add value="+"/>
+                <add value="{"/>
+                <add value="}"/>
+                <add value="\"/>
+                <add value="["/>
+                <add value="]"/>
+                <add value="."/>
+                <add value=","/>
+            </valid>
+        </sanitizer>
+        <validator type="empty_field" />
+        <validator type="regex" message="Pattern must not end with backslash.">.*[^\\]$</validator>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.fasta	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,11 @@
+>Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;
+gttcgatgcc taaaatacct tcttttgtcc ctacacagac cacagttttc ctaatggctt
+tacaccgact agaaattctt gtgcaagcac taattgaaag cggttggcct agagtgttac
+cggtttgtat agctgagcgc gtctcttgcc ctgatcaaag gttcattttc tctactttgg
+aagacgttgt ggaagaatac aacaagtacg agtctctccc ccctggtttg ctgattactg
+gatacagttg taataccctt cgcaacaccg cgtaactatc tatatgaatt attttccctt
+tattatatgt agtaggttcg tctttaatct tcctttagca agtcttttac tgttttcgac
+ctcaatgttc atgttcttag gttgttttgg ataatatgcg gtcagtttaa tcttcgttgt
+ttcttcttaa aatatttatt catggtttaa tttttggttt gtacttgttc aggggccagt
+tcattattta ctctgtttgt atacagcagt tcttttattt ttagtatgat tttaatttaa
+aacaattcta atggtcaaaa a
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_fuzznuc_out.tabular	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,41 @@
+SeqName	Start	End	Score	Strand	Pattern_name	Mismatch
+Sequence	12	13	2	+	pattern1	.
+Sequence	13	14	2	+	pattern1	.
+Sequence	14	15	2	+	pattern1	.
+Sequence	53	54	2	+	pattern1	.
+Sequence	73	74	2	+	pattern1	.
+Sequence	74	75	2	+	pattern1	.
+Sequence	85	86	2	+	pattern1	.
+Sequence	92	93	2	+	pattern1	.
+Sequence	97	98	2	+	pattern1	.
+Sequence	98	99	2	+	pattern1	.
+Sequence	157	158	2	+	pattern1	.
+Sequence	158	159	2	+	pattern1	.
+Sequence	181	182	2	+	pattern1	.
+Sequence	193	194	2	+	pattern1	.
+Sequence	196	197	2	+	pattern1	.
+Sequence	201	202	2	+	pattern1	.
+Sequence	204	205	2	+	pattern1	.
+Sequence	252	253	2	+	pattern1	.
+Sequence	264	265	2	+	pattern1	.
+Sequence	274	275	2	+	pattern1	.
+Sequence	287	288	2	+	pattern1	.
+Sequence	326	327	2	+	pattern1	.
+Sequence	340	341	2	+	pattern1	.
+Sequence	364	365	2	+	pattern1	.
+Sequence	393	394	2	+	pattern1	.
+Sequence	409	410	2	+	pattern1	.
+Sequence	429	430	2	+	pattern1	.
+Sequence	430	431	2	+	pattern1	.
+Sequence	431	432	2	+	pattern1	.
+Sequence	449	450	2	+	pattern1	.
+Sequence	534	535	2	+	pattern1	.
+Sequence	539	540	2	+	pattern1	.
+Sequence	540	541	2	+	pattern1	.
+Sequence	541	542	2	+	pattern1	.
+Sequence	544	545	2	+	pattern1	.
+Sequence	550	551	2	+	pattern1	.
+Sequence	557	558	2	+	pattern1	.
+Sequence	558	559	2	+	pattern1	.
+Sequence	559	560	2	+	pattern1	.
+Sequence	560	561	2	+	pattern1	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_fuzznuc_out2.tabular	Tue Jan 25 03:26:28 2022 +0000
@@ -0,0 +1,6 @@
+SeqName	Start	End	Score	Strand	Pattern_name	Mismatch
+Sequence	157	158	2	+	pattern1	.
+Sequence	158	159	2	+	pattern1	.
+Sequence	181	182	2	+	pattern1	.
+Sequence	193	194	2	+	pattern1	.
+Sequence	196	197	2	+	pattern1	.