view scripts/S01_phylip2fasta.py @ 0:d0ae18156aa2 draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:25:52 -0500
parents
children
line wrap: on
line source

#!/usr/bin/python

## AUTHOR: Eric Fontanillas
## LAST VERSION: 20/08/14 by Julie BAFFARD

## DESCRIPTION: formatting a fasta format into phylip format for using with PAML

import string, os, sys
"""
if len(sys.argv) == 1:
    print "put arguments!!"
    print "USAGE: S01_phylip2fasta.py INPUT OUTPUT"
"""

## INPUT
if os.path.isfile(sys.argv[1]) :
    f1 = sys.argv[1]
else:
    print "No existing phylip file ; exiting ..."
    exit()

F1 = open("%s" %f1, 'r')

## OUTPUT
f2 = sys.argv[2]
F2 = open("%s" %f2, 'w')

###### def1 ######
# Dans un multialignement fasta, cette fonction permet de formatter les noms de chaque sequence fasta

def format(File_IN):
    c = 0
    fichier = ""
    while 1 :
        c = c + 1
        next = File_IN.readline()
        if not next :
            break
        
        S1 = string.split(next, "\t")    # list : [name, sequence] --- BUG CORRECTED : "\t" instead of " "
        fasta_name = S1[0]    # get sequence name
        fasta_seq = S1[1][:-1]    # get sequence without the terminal '\n'
        fichier = fichier + ">" + fasta_name + "\n" + fasta_seq + "\n"
        
    return (fichier,c)
#-#-#-#-#-#-#-#-#-#-#

###################
### RUN RUN RUN ###
###################

F1.readline() ## jump the first line

fichier_txt, c = format(F1)   ### DEF1 ###

F2.write(fichier_txt)

F1.close()
F2.close()