Previous changeset 5:650d553c1fda (2017-07-24) |
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fasta_merge_files_and_filter_unique_sequences commit 0ce1979ec9cf851f85ad74c78a3cc88826a2f070" |
modified:
fasta_merge_files_and_filter_unique_sequences.py test-data/res-accession.fa test-data/res-sequence.fa |
b |
diff -r 650d553c1fda -r f546e7278f04 fasta_merge_files_and_filter_unique_sequences.py --- a/fasta_merge_files_and_filter_unique_sequences.py Mon Jul 24 18:24:52 2017 -0400 +++ b/fasta_merge_files_and_filter_unique_sequences.py Mon Nov 23 19:35:09 2020 +0000 |
[ |
@@ -1,19 +1,23 @@ #!/usr/bin/env python import os +import re import sys -import re + class Sequence: ''' Holds protein sequence information ''' + def __init__(self): self.header = "" self.accession = "" self.sequence = "" + class FASTAReader: """ FASTA db iterator. Returns a single FASTA sequence object. """ + def __init__(self, fasta_name, accession_parser): self.fasta_file = open(fasta_name) self.accession_parser = accession_parser @@ -31,11 +35,11 @@ break seq = Sequence() - seq.header = line.rstrip().replace('\n','').replace('\r','') + seq.header = line.rstrip().replace('\n', '').replace('\r', '') m = re.search(self.accession_parser, seq.header) if not m or len(m.groups()) < 1 or len(m.group(1)) == 0: - sys.exit("Could not parse accession from '%s'" % seq.header) + sys.exit("Could not parse accession from '%s'" % seq.header) seq.accession = m.group(1) while True: @@ -46,7 +50,7 @@ if line[0] == '>': self.fasta_file.seek(tail) break - seq.sequence = seq.sequence + line.rstrip().replace('\n','').replace('\r','') + seq.sequence = seq.sequence + line.rstrip().replace('\n', '').replace('\r', '') return seq # Python 2/3 compat @@ -66,8 +70,8 @@ sys.exit("2nd argument must be 'sequence' or 'accession'") accession_parser = sys.argv[3] - for key, value in { '\'' :'__sq__', '\\' : '__backslash__' }.items(): - accession_parser = accession_parser.replace(value, key) + for key, value in {'\'': '__sq__', '\\': '__backslash__'}.items(): + accession_parser = accession_parser.replace(value, key) for fasta_file in sys.argv[4:]: print("Reading entries from '%s'" % fasta_file) @@ -95,5 +99,6 @@ out_file.write(os.linesep) out_file.close() + if __name__ == "__main__": main() |
b |
diff -r 650d553c1fda -r f546e7278f04 test-data/res-accession.fa --- a/test-data/res-accession.fa Mon Jul 24 18:24:52 2017 -0400 +++ b/test-data/res-accession.fa Mon Nov 23 19:35:09 2020 +0000 |
b |
@@ -9,4 +9,4 @@ >two_2 GGTGTGTACGT >three_2|123 -ACGTACGACTTTGGTTGTGT \ No newline at end of file +ACGTACGACTTTGGTTGTGT |
b |
diff -r 650d553c1fda -r f546e7278f04 test-data/res-sequence.fa --- a/test-data/res-sequence.fa Mon Jul 24 18:24:52 2017 -0400 +++ b/test-data/res-sequence.fa Mon Nov 23 19:35:09 2020 +0000 |
b |
@@ -5,4 +5,4 @@ >three ACGTACG >three_2|123 -ACGTACGACTTTGGTTGTGT \ No newline at end of file +ACGTACGACTTTGGTTGTGT |