changeset 2:091edad7622f draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:25:01 -0500 (2020-03-01)
parents 7e801ab2b70e
children e7ed3c310b74
files fasta_to_tabular.py fasta_to_tabular.xml
diffstat 2 files changed, 99 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/fasta_to_tabular.py	Wed Nov 11 12:14:09 2015 -0500
+++ b/fasta_to_tabular.py	Sun Mar 01 07:25:01 2020 -0500
@@ -6,52 +6,53 @@
 format convert: fasta to tabular
 """
 
-import sys, os
+import sys
+
 
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
+def stop_err(msg):
+    sys.exit(msg)
+
 
 def __main__():
     if len(sys.argv) != 5:
         stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
     infile = sys.argv[1]
     outfile = sys.argv[2]
-    keep_first = int( sys.argv[3] )
-    descr_split = int( sys.argv[4] )
-    fasta_title = fasta_seq = ''
+    keep_first = int(sys.argv[3])
+    descr_split = int(sys.argv[4])
     if keep_first == 0:
         keep_first = None
     elif descr_split == 1:
-        #Added one for the ">" character
-        #(which is removed if using descr_split > 1)
+        # Added one for the ">" character
+        # (which is removed if using descr_split > 1)
         keep_first += 1
     if descr_split < 1:
         stop_err("Bad description split value (should be 1 or more)")
-    out = open( outfile, 'w' )
-    for i, line in enumerate( open( infile ) ):
-        line = line.rstrip( '\r\n' )
-        if not line or line.startswith( '#' ):
-            continue
-        if line.startswith( '>' ):
-            #Don't want any existing tabs to trigger extra columns:
-            line = line.replace('\t', ' ')
-            if i > 0:
-                out.write('\n')
-            if descr_split == 1:
-                out.write(line[1:keep_first])
+    with open(outfile, 'w') as out, open(infile) as in_fh:
+        for i, line in enumerate(in_fh):
+            line = line.rstrip('\r\n')
+            if not line or line.startswith('#'):
+                continue
+            if line.startswith('>'):
+                # Don't want any existing tabs to trigger extra columns:
+                line = line.replace('\t', ' ')
+                if i > 0:
+                    out.write('\n')
+                if descr_split == 1:
+                    out.write(line[1:keep_first])
+                else:
+                    words = line[1:].split(None, descr_split - 1)
+                    # apply any truncation to first word (the id)
+                    words[0] = words[0][0:keep_first]
+                    # pad with empty columns if required
+                    words += [""] * (descr_split - len(words))
+                    out.write("\t".join(words))
+                out.write('\t')
             else:
-                words = line[1:].split(None, descr_split-1)
-                #apply any truncation to first word (the id)
-                words[0] = words[0][0:keep_first]
-                #pad with empty columns if required
-                words += [""]*(descr_split-len(words))
-                out.write("\t".join(words))
-            out.write('\t')
-        else:
-            out.write(line)
-    if i > 0:
-        out.write('\n')
-    out.close()
+                out.write(line)
+        if i > 0:
+            out.write('\n')
 
-if __name__ == "__main__" : __main__()
+
+if __name__ == "__main__":
+    __main__()
--- a/fasta_to_tabular.xml	Wed Nov 11 12:14:09 2015 -0500
+++ b/fasta_to_tabular.xml	Sun Mar 01 07:25:01 2020 -0500
@@ -1,64 +1,67 @@
-<tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.0">
-	<description>converter</description>
-	<command interpreter="python">fasta_to_tabular.py $input $output $keep_first $descr_columns</command>
-	<inputs>
-		<param name="input" type="data" format="fasta" label="Convert these sequences"/>
-		<param name="descr_columns" type="integer" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
-			<validator type="in_range" min="1" />
-		</param>
-		<param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
-			<validator type="in_range" min="0" />
-		</param>
-	</inputs>
-	<outputs>
-		<data name="output" format="tabular"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out1.tabular" />
-		</test>
-		
-		<test>
-			<param name="input" value="4.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out2.tabular" />
-		</test>
-		
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="1"/>
-			<param name="keep_first" value="14"/>
-			<output name="output" file="fasta_to_tabular_out3.tabular" />
-		</test>
+<tool id="fasta2tab" name="FASTA-to-Tabular" version="1.1.1" profile="16.04">
+    <description>converter</description>
+    <requirements>
+        <requirement type="package" version="3.7">python</requirement>
+    </requirements>
+    <command>
+python '$__tool_directory__/fasta_to_tabular.py' '$input' '$output' $keep_first $descr_columns
+    </command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Convert these sequences"/>
+        <param name="descr_columns" type="integer" value="1" min="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
+        </param>
+        <param name="keep_first" type="integer" value="0" min="0" label="How many title characters to keep?" help="Applies only to the first column taken from the title string ('0' = keep the whole thing), useful when your sequence identifiers are all the same length.">
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="descr_columns" value="1"/>
+            <param name="keep_first" value="0"/>
+            <output name="output" file="fasta_to_tabular_out1.tabular" />
+        </test>
 
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="2"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out4.tabular" />
-		</test>
+        <test>
+            <param name="input" value="4.fasta" />
+            <param name="descr_columns" value="1"/>
+            <param name="keep_first" value="0"/>
+            <output name="output" file="fasta_to_tabular_out2.tabular" />
+        </test>
+
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="descr_columns" value="1"/>
+            <param name="keep_first" value="14"/>
+            <output name="output" file="fasta_to_tabular_out3.tabular" />
+        </test>
 
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="5"/>
-			<param name="keep_first" value="0"/>
-			<output name="output" file="fasta_to_tabular_out5.tabular" />
-		</test>
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="descr_columns" value="2"/>
+            <param name="keep_first" value="0"/>
+            <output name="output" file="fasta_to_tabular_out4.tabular" />
+        </test>
 
-		<test>
-			<param name="input" value="454.fasta" />
-			<param name="descr_columns" value="5"/>
-			<param name="keep_first" value="10"/>
-			<output name="output" file="fasta_to_tabular_out6.tabular" />
-		</test>
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="descr_columns" value="5"/>
+            <param name="keep_first" value="0"/>
+            <output name="output" file="fasta_to_tabular_out5.tabular" />
+        </test>
 
-	</tests>
-	<help>
-	
+        <test>
+            <param name="input" value="454.fasta" />
+            <param name="descr_columns" value="5"/>
+            <param name="keep_first" value="10"/>
+            <output name="output" file="fasta_to_tabular_out6.tabular" />
+        </test>
+
+    </tests>
+    <help><![CDATA[
+
 **What it does**
 
 This tool converts FASTA formatted sequences to TAB-delimited format.
@@ -70,16 +73,16 @@
 The option *How many characters to keep?* allows to select a specified number of letters from the beginning of each FASTA entry.
 With the introduction of the **How many columns to divide title string into?** option this setting is of limited use, but does still allow you to truncate the identifier.
 
------	
+-----
 
 **Example**
 
 Suppose you have the following FASTA formatted sequences from a Roche (454) FLX sequencing run::
 
-    &gt;EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
+    >EYKX4VC02EQLO5 length=108 xy=1826_0455 region=2 run=R_2007_11_07_16_15_57_
     TCCGCGCCGAGCATGCCCATCTTGGATTCCGGCGCGATGACCATCGCCCGCTCCACCACG
     TTCGGCCGGCCCTTCTCGTCGAGGAATGACACCAGCGCTTCGCCCACG
-    &gt;EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
+    >EYKX4VC02D4GS2 length=60 xy=1573_3972 region=2 run=R_2007_11_07_16_15_57_
     AATAAAACTAAATCAGCAAAGACTGGCAAATACTCACAGGCTTATACAATACAAATGTAA
 
 Running this tool with the default settings will produce this (2 column output):
@@ -124,5 +127,5 @@
 
 Note the sequences have been truncated for display purposes in the above tables.
 
-	</help>
+    ]]></help>
 </tool>