changeset 3:36388b666bfc draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit b6de14061c479f0418cd89e26d6f5ac26e565a07
author drosofff
date Wed, 09 Nov 2016 11:24:13 -0500
parents 330dd8a8c31a
children
files fasta_tabular_converter.py fasta_tabular_converter.xml
diffstat 2 files changed, 81 insertions(+), 84 deletions(-) [+]
line wrap: on
line diff
--- a/fasta_tabular_converter.py	Fri Mar 25 19:29:40 2016 -0400
+++ b/fasta_tabular_converter.py	Wed Nov 09 11:24:13 2016 -0500
@@ -1,10 +1,11 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
+import argparse
+import logging
 import sys
-import string
-import argparse
 from collections import defaultdict
 
+
 def Parser():
     the_parser = argparse.ArgumentParser()
     the_parser.add_argument(
@@ -16,95 +17,90 @@
     args = the_parser.parse_args()
     return args
 
+
 def readfasta_writetabular(fasta, tabular, mode="oneline"):
-    F = open(fasta, "r")
-    for line in F:
+    for line in fasta:
         if line[0] == ">":
             try:
-                seqdic["".join(stringlist)] += 1 # to dump the sequence of the previous item - try because of first missing stringlist variable
-            except: pass
-            stringlist=[]
+                seqdic["".join(stringlist)] += 1  # to dump the sequence of the previous item - try because of first missing stringlist variable
+            except NameError:
+                pass
+            stringlist = []
         else:
-            stringlist.append(line[:-1])
+            try:
+                stringlist.append(line[:-1])
+            except UnboundLocalError:  # if file went through filter and contains only empty lines
+                logging.info("first line is empty.")
     try:
-        seqdic["".join(stringlist)] +=  1 # for the last sequence
-    except: pass # in case file to convert is empty
-    F.close()
-    F = open(tabular, "w")
+        seqdic["".join(stringlist)] += 1  # for the last sequence
+    except NameError:
+        logging.info("input file has not fasta sequences.")
     for seq in sorted(seqdic, key=seqdic.get, reverse=True):
-        print >> F, "%s\t%s" % (seq, seqdic[seq])
-    F.close()
-    
-        
+        tabular.write("%s\t%s\n" % (seq, seqdic[seq]))
+
+
 def readtabular_writefasta(tabular, fasta):
-  F = open(tabular, "r")
-  Fw = open(fasta, "w")
-  counter = 0
-  for line in F:
-    fields = line.split()
-    for i in range(int(fields[1])):
-      counter += 1
-      print >> Fw, ">%s\n%s" % (counter, fields[0])
-  F.close()
-  Fw.close()
+    counter = 0
+    for line in tabular:
+        fields = line.split()
+        for i in range(int(fields[1])):
+            counter += 1
+            fasta.write(">%s\n%s\n" % (counter, fields[0]))
+
 
-def readtabular_writefastaweighted (tabular, fasta):
-  F = open(tabular, "r")
-  Fw = open(fasta, "w")
-  counter = 0
-  for line in F:
-    counter += 1
-    fields = line[:-1].split()
-    print >> Fw, ">%s_%s\n%s" % (counter, fields[1],  fields[0])
-  F.close()
-  Fw.close()
+def readtabular_writefastaweighted(tabular, fasta):
+    counter = 0
+    for line in tabular:
+        counter += 1
+        fields = line[:-1].split()
+        fasta.write(">%s_%s\n%s\n" % (counter, fields[1],  fields[0]))
+
 
-def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
-  F = open(fastaweigthed_input, "r")
-  number_reads = 0
-  for line in F:
-    if line[0] == ">":
-      weigth = int(line[1:-1].split("_")[-1])
-      number_reads += weigth
-    else:
-      seqdic[line[:-1]] += weigth
-  F.close()
-  F = open(fastaweigthed_reparsed, "w")
-  n=0
-  for seq in sorted(seqdic, key=seqdic.get, reverse=True):
-    n += 1
-    print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq)
-  F.close()
-  print "%s reads collapsed" % number_reads
+def readfastaweighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
+    number_reads = 0
+    for line in fastaweigthed_input:
+        if line[0] == ">":
+            weigth = int(line[1:-1].split("_")[-1])
+            number_reads += weigth
+        else:
+            seqdic[line[:-1]] += weigth
+    n = 0
+    for seq in sorted(seqdic, key=seqdic.get, reverse=True):
+        n += 1
+        fastaweigthed_reparsed.write(">%s_%s\n%s\n" % (n, seqdic[seq], seq))
+    log.info("%s reads collapsed" % number_reads)
 
-def readfastaeighted_writefasta(fastaweigthed, fasta):
-  F = open(fastaweigthed, "r")
-  Fw = open(fasta, "w")
-  counter = 0
-  for line in F:
-    if line[0] == ">":
-      weigth = int(line[1:-1].split("_")[-1])
-    else:
-      seq = line[:-1]
-      for i in range (weigth):
-        counter += 1
-        print >> Fw, ">%s\n%s" % (counter, seq)
-  F.close()
-  Fw.close()
+
+def readfastaweighted_writefasta(fastaweigthed, fasta):
+    counter = 0
+    for line in fastaweigthed:
+        if line[0] == ">":
+            weigth = int(line[1:-1].split("_")[-1])
+        else:
+            seq = line[:-1]
+            for i in range(weigth):
+                counter += 1
+                fasta.write(">%s\n%s\n" % (counter, seq))
+
 
 def main(input, output, type):
-    if type == "fasta2tabular":
-        readfasta_writetabular(input, output)
-    elif type == "tabular2fasta":
-        readtabular_writefasta(input, output)
-    elif type == "tabular2fastaweight":
-        readtabular_writefastaweighted (input, output)
-    elif type == "fastaweight2fastaweight":
-        readfastaeighted_writefastaweighted(input, output)
-    elif type == "fastaweight2fasta":
-        readfastaeighted_writefasta(input, output)
+    with open(input, "r") as input:
+        with open(output, "w") as output:
+            if type == "fasta2tabular":
+                readfasta_writetabular(input, output)
+            elif type == "tabular2fasta":
+                readtabular_writefasta(input, output)
+            elif type == "tabular2fastaweight":
+                readtabular_writefastaweighted(input, output)
+            elif type == "fastaweight2fastaweight":
+                readfastaweighted_writefastaweighted(input, output)
+            elif type == "fastaweight2fasta":
+                readfastaweighted_writefasta(input, output)
+
 
 if __name__ == "__main__":
     seqdic = defaultdict(int)
     args = Parser()
-    main (args.input, args.output, args.type)
+    log = logging.getLogger(__name__)
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+    main(args.input, args.output, args.type)
--- a/fasta_tabular_converter.xml	Fri Mar 25 19:29:40 2016 -0400
+++ b/fasta_tabular_converter.xml	Wed Nov 09 11:24:13 2016 -0500
@@ -1,10 +1,11 @@
 <tool id="fasta_tabular_converter" name="fasta - tabular" version="1.1.0">
   <description>conversions</description>
-  <command interpreter="python">fasta_tabular_converter.py
-                                          --input $input
-                                          --output $output
-                                          --type $switch.conversionType
-  </command>
+  <command><![CDATA[
+      python '$__tool_directory__'/fasta_tabular_converter.py
+      --input '$input'
+      --output '$output'
+      --type $switch.conversionType
+  ]]></command>
   <inputs>
     <conditional name="switch">
        <param name="conversionType" type="select" label="conversion option">