Mercurial > repos > iuc > tn93_readreduce
annotate tn93_cluster.py @ 1:84849140a3bc draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
| author | iuc | 
|---|---|
| date | Fri, 23 Apr 2021 03:04:45 +0000 | 
| parents | |
| children | 1d2ec0b0a0a7 | 
| rev | line source | 
|---|---|
| 1 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 1 import argparse | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 2 import json | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 3 import os | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 4 import shlex | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 5 import shutil | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 6 import subprocess | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 7 import sys | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 8 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 9 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 10 def cluster_to_fasta(json_file, fasta_file, reference_name=None): | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 11 with open(json_file, "r") as fh: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 12 cluster_json = json.load(fh) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 13 with open(fasta_file, "w") as fh2: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 14 for c in cluster_json: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 15 if reference_name is not None: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 16 if reference_name in c['members']: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 17 cc = c['centroid'].split('\n') | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 18 cc[0] = ">" + reference_name | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 19 print("\n".join(cc), file=fh2) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 20 continue | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 21 print(c['centroid'], file=fh2) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 22 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 23 return(os.path.getmtime(fasta_file), len(cluster_json)) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 24 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 25 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 26 def run_command(command): | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 27 proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 28 stdout, stderr = proc.communicate() | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 29 result = proc.returncode | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 30 if result != 0: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 31 print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 32 print('--------------------- STDOUT ---------------------') | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 33 print(stdout.decode().replace('\\n', '\n')) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 34 print('------------------- END STDOUT -------------------') | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 35 print('--------------------- STDERR ---------------------', file=sys.stderr) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 36 print(stderr.decode().replace('\\n', '\n'), file=sys.stderr) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 37 print('------------------- END STDERR -------------------', file=sys.stderr) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 38 return(int(result)) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 39 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 40 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 41 def main(arguments): | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 42 threshold = arguments.threshold | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 43 step = threshold * 0.25 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 44 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa')) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 45 shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak')) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 46 with open(arguments.reference) as fh: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 47 for line in fh: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 48 if line[0] == '>': | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 49 _ref_seq_name = line[1:].split(' ')[0].strip() | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 50 break | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 51 while True and threshold <= 1: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 52 command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 53 return_code = run_command(command) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 54 if return_code != 0: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 55 return return_code | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 56 input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 57 if cluster_count <= arguments.cluster_count or threshold == 1: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 58 break | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 59 else: | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 60 threshold += step | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 61 print('Found %d clusters at threshold %f' % (cluster_count, threshold)) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 62 shutil.copy('reference_msa.fa.bak', arguments.compressed) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 63 shutil.copy('clusters.json', arguments.output) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 64 os.remove('reference_msa.fa.bak') | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 65 return 0 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 66 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 67 | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 68 if __name__ == '__main__': | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 69 parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 70 parser.add_argument('--input', help='Input MSA', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 71 parser.add_argument('--reference', help='Reference sequence', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 72 parser.add_argument('--output', help='Input MSA', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 73 parser.add_argument('--threshold', help='Threshold', required=True, type=float) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 74 parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 75 parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 76 parser.add_argument('--overlap', help='Overlap', required=True, type=int) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 77 parser.add_argument('--fraction', help='Fraction', required=True, type=float) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 78 parser.add_argument('--cluster-count', help='Max query', required=True, type=int) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 79 parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str) | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 80 arguments = parser.parse_args() | 
| 
84849140a3bc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
 iuc parents: diff
changeset | 81 exit(main(arguments)) | 
