Mercurial > repos > iuc > tn93_readreduce
comparison tn93_cluster.py @ 2:1d2ec0b0a0a7 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit 98c0d716cbd1237ae735ce83e0153ee246abd5d8"
| author | iuc |
|---|---|
| date | Wed, 20 Apr 2022 16:59:27 +0000 |
| parents | 84849140a3bc |
| children | c176164dc8a5 |
comparison
equal
deleted
inserted
replaced
| 1:84849140a3bc | 2:1d2ec0b0a0a7 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import json | 2 import json |
| 3 import os | 3 import os |
| 4 import shlex | 4 import shlex |
| 5 import shutil | |
| 6 import subprocess | 5 import subprocess |
| 7 import sys | 6 import sys |
| 8 | 7 |
| 9 | 8 |
| 10 def cluster_to_fasta(json_file, fasta_file, reference_name=None): | 9 def cluster_to_fasta(json_file, fasta_file, reference_name=None): |
| 39 | 38 |
| 40 | 39 |
| 41 def main(arguments): | 40 def main(arguments): |
| 42 threshold = arguments.threshold | 41 threshold = arguments.threshold |
| 43 step = threshold * 0.25 | 42 step = threshold * 0.25 |
| 44 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa')) | |
| 45 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak')) | |
| 46 with open(arguments.reference) as fh: | 43 with open(arguments.reference) as fh: |
| 47 for line in fh: | 44 for line in fh: |
| 48 if line[0] == '>': | 45 if line[0] == '>': |
| 49 _ref_seq_name = line[1:].split(' ')[0].strip() | 46 _ref_seq_name = line[1:].split(' ')[0].strip() |
| 50 break | 47 break |
| 51 while True and threshold <= 1: | 48 while threshold <= 1: |
| 52 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction) | 49 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input) |
| 53 return_code = run_command(command) | 50 return_code = run_command(command) |
| 54 if return_code != 0: | 51 if return_code != 0: |
| 55 return return_code | 52 return return_code |
| 56 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name) | 53 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name) |
| 57 if cluster_count <= arguments.cluster_count or threshold == 1: | 54 if cluster_count <= arguments.cluster_count: |
| 58 break | 55 break |
| 59 else: | 56 else: |
| 60 threshold += step | 57 threshold += step |
| 61 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) | 58 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) |
| 62 shutil.copy('reference_msa.fa.bak', arguments.compressed) | |
| 63 shutil.copy('clusters.json', arguments.output) | |
| 64 os.remove('reference_msa.fa.bak') | |
| 65 return 0 | 59 return 0 |
| 66 | 60 |
| 67 | 61 |
| 68 if __name__ == '__main__': | 62 if __name__ == '__main__': |
| 69 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') | 63 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') |
