Mercurial > repos > devteam > tabular_to_fasta

--- a/tabular_to_fasta.py	Mon May 19 12:34:07 2014 -0400
+++ b/tabular_to_fasta.py	Wed Feb 05 10:51:52 2020 -0500
@@ -4,65 +4,66 @@
 Output: fasta
 Return sequences whose lengths are within the range.
 """
-import sys, os
+import os
+import sys

-assert sys.version_info[:2] >= ( 2, 4 )

-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
+def stop_err(msg):
+    sys.exit(msg)
+

 def __main__():
     infile = sys.argv[1]
     title_col = sys.argv[2]
     seq_col = sys.argv[3]
-    outfile = sys.argv[4]
+    outfile = sys.argv[4]

     if title_col == None or title_col == 'None' or seq_col == None or seq_col == 'None':
-        stop_err( "Columns not specified." )
+        stop_err("Columns not specified.")
     try:
-        seq_col = int( seq_col ) - 1
+        seq_col = int(seq_col) - 1
     except:
-        stop_err( "Invalid Sequence Column: %s." %str( seq_col ) )
+        stop_err("Invalid Sequence Column: %s." % str(seq_col))

-    title_col_list = title_col.split( ',' )
-    out = open( outfile, 'w' )
+    title_col_list = title_col.split(',')
     skipped_lines = 0
     first_invalid_line = 0
     invalid_line = ""
     i = 0
-
-    for i, line in enumerate( open( infile ) ):
-        error = False
-        line = line.rstrip( '\r\n' )
-        if line and not line.startswith( '#' ):
-            fields = line.split( '\t' )
-            fasta_title = []
-            for j in title_col_list:
-                try:
-                    j = int( j ) - 1
-                    fasta_title.append( fields[j] )
-                except:
-                    skipped_lines += 1
-                    if not invalid_line:
-                        first_invalid_line = i + 1
-                        invalid_line = line
-                    error = True
-                    break
-            if not error:
-                try:
-                    fasta_seq = fields[seq_col]
-                    if fasta_title[0].startswith( ">" ):
-                        fasta_title[0] = fasta_title[0][1:]
-                    print >> out, ">%s\n%s" % ( "_".join( fasta_title ), fasta_seq )
-                except:
-                    skipped_lines += 1
-                    if not invalid_line:
-                        first_invalid_line = i + 1
-                        invalid_line = line
-    out.close()
+
+    with open(outfile, 'w') as out:
+        for i, line in enumerate(open(infile)):
+            error = False
+            line = line.rstrip('\r\n')
+            if line and not line.startswith('#'):
+                fields = line.split('\t')
+                fasta_title = []
+                for j in title_col_list:
+                    try:
+                        j = int(j) - 1
+                        fasta_title.append(fields[j])
+                    except:
+                        skipped_lines += 1
+                        if not invalid_line:
+                            first_invalid_line = i + 1
+                            invalid_line = line
+                        error = True
+                        break
+                if not error:
+                    try:
+                        fasta_seq = fields[seq_col]
+                        if fasta_title[0].startswith(">"):
+                            fasta_title[0] = fasta_title[0][1:]
+                        print(">%s\n%s" % ("_".join(fasta_title), fasta_seq), file=out)
+                    except:
+                        skipped_lines += 1
+                        if not invalid_line:
+                            first_invalid_line = i + 1
+                            invalid_line = line

     if skipped_lines > 0:
-        print 'Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
+        print('Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line))
+

-if __name__ == "__main__" : __main__()
\ No newline at end of file
+if __name__ == "__main__":
+    __main__()
--- a/tabular_to_fasta.xml	Mon May 19 12:34:07 2014 -0400
+++ b/tabular_to_fasta.xml	Wed Feb 05 10:51:52 2020 -0500
@@ -1,43 +1,50 @@
-<tool id="tab2fasta" name="Tabular-to-FASTA" version="1.1.0">
-	<description>converts tabular file to FASTA format</description>
-	<command interpreter="python">tabular_to_fasta.py $input $title_col $seq_col $output </command>
-	<inputs>
-		<param name="input" type="data" format="tabular" label="Tab-delimited file"/>
-		<param name="title_col" type="data_column" data_ref="input" multiple="True" numerical="False" label="Title column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"/>
-		<param name="seq_col" type="data_column" data_ref="input" numerical="False" label="Sequence column" />
-	</inputs>
-	<outputs>
-		<data name="output" format="fasta"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="input" value="solexa.tabular" />
-			<param name="title_col" value="1,2,3,4" />
-			<param name="seq_col" value="5" />
-			<output name="output" file="tabular_to_fasta_out1.fasta" />
-		</test>
-	</tests>
-	<help>
-
+<tool id="tab2fasta" name="Tabular-to-FASTA" version="1.1.1" profile="16.04">
+    <description>converts tabular file to FASTA format</description>
+    <requirements>
+        <requirement type="package" version="3.7">python</requirement>
+    </requirements>
+    <command><![CDATA[
+python '$__tool_directory__/tabular_to_fasta.py'
+'$input'
+$title_col
+$seq_col
+'$output'
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Tab-delimited file"/>
+        <param name="title_col" type="data_column" data_ref="input" multiple="true" numerical="false" label="Title column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"/>
+        <param name="seq_col" type="data_column" data_ref="input" numerical="false" label="Sequence column" />
+    </inputs>
+    <outputs>
+        <data name="output" format="fasta"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="solexa.tabular" />
+            <param name="title_col" value="1,2,3,4" />
+            <param name="seq_col" value="5" />
+            <output name="output" file="tabular_to_fasta_out1.fasta" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What it does**

 Converts tab delimited data into FASTA formatted sequences.

 -----------
-
+
 **Example**

 Suppose this is a sequence file produced by Illumina (Solexa) sequencer::

-	5	300	902	419	GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
-	5	300	880	431	GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
-
+    5	300	902	419	GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
+    5	300	880	431	GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
+
 Selecting **c3** and **c4** as the **Title column(s)** and **c5** as the **Sequence column** will result in::

-	&gt;902_419
-	GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
-	&gt;880_431
-	GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
-
-	</help>
-</tool>
\ No newline at end of file
+    >902_419
+    GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
+    >880_431
+    GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
+    ]]></help>
+</tool>