comparison split_fasta.py @ 5:733ca84b21ee draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/splitfasta commit 31945d5d8c5ebee64ebf29c6ea022fb831f47274"
author rnateam
date Mon, 21 Sep 2020 15:40:14 +0000
parents
children
comparison
equal deleted inserted replaced
4:ae4d5733272f 5:733ca84b21ee
1 #!/usr/bin/env python
2
3 import os
4 import sys
5 from Bio import SeqIO
6
7 num_chunks = 0
8 if len(sys.argv) == 3:
9 num_chunks = int(sys.argv[2])
10 input_filename = sys.argv[1]
11 elif len(sys.argv) == 2:
12 input_filename = sys.argv[1]
13 else:
14 exit("Usage: split_fasta.py <input_filename> [<num_chunks>]")
15
16 os.mkdir('splits')
17
18 if num_chunks != 0:
19 # if splitting into chunks we need to count how many records are in the
20 # input file
21 record_count = 0
22 with open(input_filename) as input_file:
23 for line in input_file:
24 if line.lstrip().startswith('>'):
25 record_count += 1
26
27 records_per_chunk = round(float(record_count) / num_chunks)
28
29 count = 1
30 with open(input_filename) as input_file:
31
32 chunk_record_count = 0 # how many lines have we written to the output file
33 records = []
34 for record in SeqIO.parse(input_file, 'fasta'):
35 records.append(record)
36 if num_chunks == 0 or (count < num_chunks and
37 len(records) >= records_per_chunk):
38 if num_chunks == 0:
39 output_filename = os.path.join('splits', record.id)
40 else:
41 output_filename = os.path.join('splits', 'part{}'.format(count))
42 SeqIO.write(records, output_filename, 'fasta')
43 count += 1
44 records = []
45
46 if records:
47 # this only applies for the mode where input file is
48 # split into chunks
49 output_filename = os.path.join('splits', 'part{}'.format(count))
50 SeqIO.write(records, output_filename, 'fasta')