comparison filter_by_fasta_ids.py @ 4:fa59d6fea7f5 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
author earlhaminst
date Fri, 03 Mar 2017 07:29:32 -0500
parents 78dd29aa7fc1
children 0a189243186d
comparison
equal deleted inserted replaced
3:78dd29aa7fc1 4:fa59d6fea7f5
7 7
8 Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) 8 Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
9 9
10 10
11 def FASTAReader_gen(fasta_filename): 11 def FASTAReader_gen(fasta_filename):
12 fasta_file = open(fasta_filename) 12 with open(fasta_filename) as fasta_file:
13 line = fasta_file.readline()
14 while True:
15 if not line:
16 return
17 assert line.startswith('>'), "FASTA headers must start with >"
18 header = line.rstrip()
19 sequence_parts = []
20 line = fasta_file.readline() 13 line = fasta_file.readline()
21 while line and line[0] != '>': 14 while True:
22 sequence_parts.append(line.rstrip()) 15 if not line:
16 return
17 assert line.startswith('>'), "FASTA headers must start with >"
18 header = line.rstrip()
19 sequence_parts = []
23 line = fasta_file.readline() 20 line = fasta_file.readline()
24 sequence = "".join(sequence_parts) 21 while line and line[0] != '>':
25 yield Sequence(header, sequence) 22 sequence_parts.append(line.rstrip())
23 line = fasta_file.readline()
24 sequence = "".join(sequence_parts)
25 yield Sequence(header, sequence)
26 26
27 27
28 def target_match(target, search_entry): 28 def target_match(target, search_entry):
29 ''' Matches ''' 29 ''' Matches '''
30 search_entry = search_entry.upper() 30 search_entry = search_entry.upper()
45 for line in f_target.readlines(): 45 for line in f_target.readlines():
46 targets.append(">%s" % line.strip().upper()) 46 targets.append(">%s" % line.strip().upper())
47 47
48 work_summary['wanted'] = len(targets) 48 work_summary['wanted'] = len(targets)
49 49
50 # output = open(sys.argv[3], "w")
51 for entry in FASTAReader_gen(sys.argv[2]): 50 for entry in FASTAReader_gen(sys.argv[2]):
52 target_matched_results = target_match(targets, entry.header) 51 target_matched_results = target_match(targets, entry.header)
53 if target_matched_results: 52 if target_matched_results:
54 work_summary['found'] += 1 53 work_summary['found'] += 1
55 targets.remove(target_matched_results) 54 targets.remove(target_matched_results)