Mercurial > repos > devteam > fasta_concatenate_by_species
diff fasta_concatenate_by_species.py @ 2:16df616b39e5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:26 -0500 |
parents | 2126e1b833a2 |
children |
line wrap: on
line diff
--- a/fasta_concatenate_by_species.py Mon Nov 17 10:15:05 2014 -0500 +++ b/fasta_concatenate_by_species.py Sun Mar 01 07:24:26 2020 -0500 @@ -1,39 +1,43 @@ #!/usr/bin/env python -#Dan Blankenberg +# Dan Blankenberg """ -Takes a Multiple Alignment FASTA file and concatenates -sequences for each species, resulting in one sequence +Takes a Multiple Alignment FASTA file and concatenates +sequences for each species, resulting in one sequence alignment per species. """ -import sys, tempfile +import sys +import tempfile +from collections import OrderedDict + from utils.maf_utilities import iter_fasta_alignment -from utils.odict import odict + def __main__(): input_filename = sys.argv[1] output_filename = sys.argv[2] - species = odict() + species = OrderedDict() cur_size = 0 - for components in iter_fasta_alignment( input_filename ): - species_not_written = species.keys() + for components in iter_fasta_alignment(input_filename): + species_not_written = list(species.keys()) for component in components: if component.species not in species: - species[component.species] = tempfile.TemporaryFile() - species[component.species].write( "-" * cur_size ) - species[component.species].write( component.text ) + species[component.species] = tempfile.TemporaryFile(mode="r+") + species[component.species].write("-" * cur_size) + species[component.species].write(component.text) try: - species_not_written.remove( component.species ) + species_not_written.remove(component.species) except ValueError: - #this is a new species + # this is a new species pass for spec in species_not_written: - species[spec].write( "-" * len( components[0].text ) ) - cur_size += len( components[0].text ) - out = open( output_filename, 'wb' ) - for spec, f in species.iteritems(): - f.seek( 0 ) - out.write( ">%s\n%s\n" % ( spec, f.read() ) ) - out.close() + species[spec].write("-" * len(components[0].text)) + cur_size += len(components[0].text) + with open(output_filename, 'w') as out: + for spec, f in species.items(): + f.seek(0) + out.write(">%s\n%s\n" % (spec, f.read())) -if __name__ == "__main__" : __main__() + +if __name__ == "__main__": + __main__()