diff scripts/S01_phylip2fasta.py @ 0:d0ae18156aa2 draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:25:52 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/S01_phylip2fasta.py	Fri Feb 01 10:25:52 2019 -0500
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+## AUTHOR: Eric Fontanillas
+## LAST VERSION: 20/08/14 by Julie BAFFARD
+
+## DESCRIPTION: formatting a fasta format into phylip format for using with PAML
+
+import string, os, sys
+"""
+if len(sys.argv) == 1:
+    print "put arguments!!"
+    print "USAGE: S01_phylip2fasta.py INPUT OUTPUT"
+"""
+
+## INPUT
+if os.path.isfile(sys.argv[1]) :
+    f1 = sys.argv[1]
+else:
+    print "No existing phylip file ; exiting ..."
+    exit()
+
+F1 = open("%s" %f1, 'r')
+
+## OUTPUT
+f2 = sys.argv[2]
+F2 = open("%s" %f2, 'w')
+
+###### def1 ######
+# Dans un multialignement fasta, cette fonction permet de formatter les noms de chaque sequence fasta
+
+def format(File_IN):
+    c = 0
+    fichier = ""
+    while 1 :
+        c = c + 1
+        next = File_IN.readline()
+        if not next :
+            break
+        
+        S1 = string.split(next, "\t")    # list : [name, sequence] --- BUG CORRECTED : "\t" instead of " "
+        fasta_name = S1[0]    # get sequence name
+        fasta_seq = S1[1][:-1]    # get sequence without the terminal '\n'
+        fichier = fichier + ">" + fasta_name + "\n" + fasta_seq + "\n"
+        
+    return (fichier,c)
+#-#-#-#-#-#-#-#-#-#-#
+
+###################
+### RUN RUN RUN ###
+###################
+
+F1.readline() ## jump the first line
+
+fichier_txt, c = format(F1)   ### DEF1 ###
+
+F2.write(fichier_txt)
+
+F1.close()
+F2.close()