changeset 6:f546e7278f04 draft default tip

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fasta_merge_files_and_filter_unique_sequences commit 0ce1979ec9cf851f85ad74c78a3cc88826a2f070"
author galaxyp
date Mon, 23 Nov 2020 19:35:09 +0000
parents 650d553c1fda
children
files fasta_merge_files_and_filter_unique_sequences.py test-data/res-accession.fa test-data/res-sequence.fa
diffstat 3 files changed, 13 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/fasta_merge_files_and_filter_unique_sequences.py	Mon Jul 24 18:24:52 2017 -0400
+++ b/fasta_merge_files_and_filter_unique_sequences.py	Mon Nov 23 19:35:09 2020 +0000
@@ -1,19 +1,23 @@
 #!/usr/bin/env python
 import os
+import re
 import sys
-import re
+
 
 class Sequence:
     ''' Holds protein sequence information '''
+
     def __init__(self):
         self.header = ""
         self.accession = ""
         self.sequence = ""
 
+
 class FASTAReader:
     """
         FASTA db iterator. Returns a single FASTA sequence object.
     """
+
     def __init__(self, fasta_name, accession_parser):
         self.fasta_file = open(fasta_name)
         self.accession_parser = accession_parser
@@ -31,11 +35,11 @@
                 break
 
         seq = Sequence()
-        seq.header = line.rstrip().replace('\n','').replace('\r','')
+        seq.header = line.rstrip().replace('\n', '').replace('\r', '')
 
         m = re.search(self.accession_parser, seq.header)
         if not m or len(m.groups()) < 1 or len(m.group(1)) == 0:
-          sys.exit("Could not parse accession from '%s'" % seq.header)
+            sys.exit("Could not parse accession from '%s'" % seq.header)
         seq.accession = m.group(1)
 
         while True:
@@ -46,7 +50,7 @@
             if line[0] == '>':
                 self.fasta_file.seek(tail)
                 break
-            seq.sequence = seq.sequence + line.rstrip().replace('\n','').replace('\r','')
+            seq.sequence = seq.sequence + line.rstrip().replace('\n', '').replace('\r', '')
         return seq
 
     # Python 2/3 compat
@@ -66,8 +70,8 @@
         sys.exit("2nd argument must be 'sequence' or 'accession'")
 
     accession_parser = sys.argv[3]
-    for key, value in { '\'' :'__sq__', '\\' : '__backslash__' }.items():
-      accession_parser = accession_parser.replace(value, key)
+    for key, value in {'\'': '__sq__', '\\': '__backslash__'}.items():
+        accession_parser = accession_parser.replace(value, key)
 
     for fasta_file in sys.argv[4:]:
         print("Reading entries from '%s'" % fasta_file)
@@ -95,5 +99,6 @@
             out_file.write(os.linesep)
     out_file.close()
 
+
 if __name__ == "__main__":
     main()
--- a/test-data/res-accession.fa	Mon Jul 24 18:24:52 2017 -0400
+++ b/test-data/res-accession.fa	Mon Nov 23 19:35:09 2020 +0000
@@ -9,4 +9,4 @@
 >two_2
 GGTGTGTACGT
 >three_2|123
-ACGTACGACTTTGGTTGTGT
\ No newline at end of file
+ACGTACGACTTTGGTTGTGT
--- a/test-data/res-sequence.fa	Mon Jul 24 18:24:52 2017 -0400
+++ b/test-data/res-sequence.fa	Mon Nov 23 19:35:09 2020 +0000
@@ -5,4 +5,4 @@
 >three
 ACGTACG
 >three_2|123
-ACGTACGACTTTGGTTGTGT
\ No newline at end of file
+ACGTACGACTTTGGTTGTGT