Mercurial > repos > rplanel > sequence_splitter
annotate sequence-splitter.py @ 0:3e33310a7082 draft
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
| author | rplanel | 
|---|---|
| date | Thu, 08 Aug 2019 11:18:30 -0400 | 
| parents | |
| children | 7b509a1801e4 | 
| rev | line source | 
|---|---|
| 
0
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python3 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
2 # coding: utf-8 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
3 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
4 import argparse | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
5 import logging | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
6 import os | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
7 from itertools import chain, islice, tee | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
8 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
9 # BioPython | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
10 from Bio import SeqIO | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
11 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
12 logging.basicConfig( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
13 level=logging.INFO, format="%(asctime)s : %(levelname)s : %(message)s" | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
14 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
15 logger = logging.getLogger() | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
16 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
17 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
18 def main(): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
19 args = parse_arguments() | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
20 # extract the basename and the file extension | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
21 basename, file_extension = os.path.splitext(args.sequences) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
22 # extract the filename (with no extension) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
23 _, filename = os.path.split(basename) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
24 chunk_size = args.chunk_size | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
25 # split the sequences in chunks | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
26 if chunk_size: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
27 logger.info("%s = %s", "chunk size parameter", chunk_size) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
28 sequences_record = gen_sequence_record(args.sequences, args.format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
29 chunks = gen_get_chunks_by_size(sequences_record, chunk_size) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
30 else: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
31 logger.info("%s = %s", "number of chunks parameter", args.nb_chunk) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
32 chunks = gen_get_chunks(args.sequences, args.format, args.nb_chunk) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
33 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
34 # Write the chunks in numbered files. | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
35 write_chunks(chunks, args.output, filename, file_extension, args.format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
36 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
37 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
38 def gen_get_chunks(sequences_path, sequences_format, nb_chunk): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
39 """[summary] | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
40 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
41 Arguments: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
42 sequences_path {[type]} -- [description] | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
43 sequences_format {[type]} -- [description] | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
44 nb_chunk {[type]} -- [description] | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
45 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
46 Returns: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
47 [type] -- [description] | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
48 """ | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
49 # First record to count the sequences | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
50 sequences_record_to_count = gen_sequence_record( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
51 sequences_path, sequences_format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
52 # Get the number of sequences | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
53 nb_sequences = get_nb_sequences(sequences_record_to_count) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
54 logger.info("%s = %i", "Number of sequences per chunk", nb_sequences) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
55 # Second record to that will be splitted | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
56 sequences_to_split = gen_sequence_record(sequences_path, sequences_format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
57 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
58 # Get the size of the chunks | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
59 chunk_size = int(nb_sequences / nb_chunk) if nb_sequences > nb_chunk else 1 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
60 return gen_get_chunks_by_size(sequences_to_split, chunk_size) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
61 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
62 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
63 def gen_get_chunks_by_size(iterable, size=10): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
64 logger.info( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
65 "%s = %i", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
66 "chunk size got (could be different from parameter if more chunk asked \ | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
67 than sequences in multifasta)", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
68 size | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
69 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
70 iterator = iter(iterable) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
71 for first in iterator: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
72 yield chain([first], islice(iterator, size - 1)) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
73 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
74 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
75 def gen_sequence_record(sequences_path, sequence_format): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
76 return SeqIO.parse(sequences_path, sequence_format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
77 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
78 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
79 def get_nb_sequences(sequences): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
80 return sum(1 for _ in sequences) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
81 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
82 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
83 def write_chunks(iterable, dirname, filename, file_extension, sequence_format): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
84 for idx, chunk in enumerate(iterable): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
85 if not os.path.exists(dirname): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
86 os.mkdir(dirname) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
87 output_file = os.path.join( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
88 dirname, filename + "-chunk-" + str(idx + 1) + file_extension | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
89 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
90 with open(output_file, mode="w") as output_handle: | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
91 count_seq, seq_to_write = tee(chunk, 2) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
92 logger.info( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
93 "%s : number of seuquences = %i", output_file, len( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
94 list(count_seq)) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
95 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
96 SeqIO.write(seq_to_write, output_handle, sequence_format) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
97 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
98 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
99 def parse_arguments(): | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
100 parser = argparse.ArgumentParser(description="Split fasta/fastq files") | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
101 parser.add_argument( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
102 "-s", "--sequences", type=str, help="File that contains the sequences" | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
103 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
104 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
105 parser.add_argument("-f", "--format", type=str, | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
106 help="File format (fastq, fasta)") | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
107 group = parser.add_mutually_exclusive_group(required=True) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
108 group.add_argument( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
109 "-c", "--chunk-size", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
110 type=int, | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
111 help="The number of sequences by chunks." | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
112 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
113 group.add_argument("-n", "--nb-chunk", help="Number of chunks", type=int) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
114 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
115 parser.add_argument( | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
116 "-o", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
117 "--output", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
118 type=str, | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
119 default="./", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
120 help="The output directory where the chunks will be saved", | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
121 ) | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
122 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
123 return parser.parse_args() | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
124 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
125 | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
126 if __name__ == "__main__": | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
127 logger.info("START") | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
128 main() | 
| 
 
3e33310a7082
planemo upload for repository https://github.com/rplanel/galaxy-tools/tree/master/tools/sequence-splitter commit cba12d215a89e9685c7d1f55d067770d7ec0dea2
 
rplanel 
parents:  
diff
changeset
 | 
129 logger.info("FINISHED") | 
