Mercurial > repos > iuc > data_manager_mash_sketch_builder
comparison data_manager/mash_sketch_builder.py @ 0:2af9137ba067 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit c6efcbece52dec310253537b35419839746fff7f"
author | iuc |
---|---|
date | Wed, 26 Feb 2020 17:06:21 -0500 |
parents | |
children | b6016642539d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2af9137ba067 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import errno | |
5 import json | |
6 import os | |
7 import subprocess | |
8 import uuid | |
9 | |
10 | |
11 DATA_TABLE_NAME = "mash_sketches" | |
12 | |
13 | |
14 def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME): | |
15 UUID = str(uuid.uuid4()) | |
16 | |
17 os.mkdir(os.path.join(target_directory, UUID)) | |
18 | |
19 sketch_path = os.path.join(target_directory, UUID, "sketch") | |
20 | |
21 args = [ | |
22 '-k', str(mash_sketch_args["kmer_size"]), | |
23 '-s', str(mash_sketch_args["sketch_size"]), | |
24 '-w', str(mash_sketch_args["probability_threshold"]), | |
25 '-o', str(sketch_path), | |
26 '-p', str(mash_sketch_args["threads"]), | |
27 str(mash_sketch_args["fasta"]), | |
28 ] | |
29 | |
30 if mash_sketch_args["individual_sequences"]: | |
31 args = args + ["-i"] | |
32 | |
33 subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory) | |
34 | |
35 data_table_entry = { | |
36 'data_tables': { | |
37 data_table_name: [ | |
38 { | |
39 "value": UUID, | |
40 "name": sketch_name, | |
41 "path": UUID, | |
42 } | |
43 ] | |
44 } | |
45 } | |
46 | |
47 return data_table_entry | |
48 | |
49 | |
50 def main(): | |
51 parser = argparse.ArgumentParser() | |
52 parser.add_argument('data_manager_json') | |
53 parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length') | |
54 parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length') | |
55 parser.add_argument('--probability-threshold', dest='probability_threshold', type=float, default=0.01, help='Probability threshold for warning about low k-mer size') | |
56 parser.add_argument('--individual-sequences', dest='individual_sequences', action='store_true', default=False, help='Sketch individual sequences (for multi-fasta files)') | |
57 parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch') | |
58 parser.add_argument('--threads', dest='threads', default=1, help='threads') | |
59 parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch') | |
60 args = parser.parse_args() | |
61 | |
62 data_manager_input = json.loads(open(args.data_manager_json).read()) | |
63 | |
64 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | |
65 | |
66 try: | |
67 os.mkdir( target_directory ) | |
68 except OSError as exc: | |
69 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): | |
70 pass | |
71 else: | |
72 raise | |
73 | |
74 data_manager_output = {} | |
75 | |
76 mash_sketch_args = { | |
77 "kmer_size": args.kmer_size, | |
78 "sketch_size": args.sketch_size, | |
79 "probability_threshold": args.probability_threshold, | |
80 "fasta": args.fasta, | |
81 "individual_sequences": args.individual_sequences, | |
82 "threads": args.threads, | |
83 } | |
84 | |
85 data_manager_output = mash_sketch( | |
86 mash_sketch_args, | |
87 args.sketch_name, | |
88 target_directory, | |
89 ) | |
90 | |
91 open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True)) | |
92 | |
93 | |
94 if __name__ == "__main__": | |
95 main() |