Mercurial > repos > bjoern-gruening > antismash
comparison antiSMASH_wrapper.py @ 0:6a37d0a4510a default tip
initial uploaded
| author | bjoern-gruening |
|---|---|
| date | Thu, 15 Mar 2012 05:23:03 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6a37d0a4510a |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: UTF-8 -*- | |
| 3 | |
| 4 import os, sys, subprocess, commands | |
| 5 import random, shutil | |
| 6 import zipfile | |
| 7 | |
| 8 | |
| 9 blastdbpath = '/home/galaxy/bin/antismash-1.1.0/db' | |
| 10 pfamdbpath = '/home/galaxy/bin/antismash-1.1.0/db' | |
| 11 antismash_path = '/home/galaxy/bin/antismash-1.1.0/antismash.py' | |
| 12 | |
| 13 | |
| 14 def zipper(dir, zip_file): | |
| 15 zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) | |
| 16 root_len = len(os.path.abspath(dir)) | |
| 17 for root, dirs, files in os.walk(dir): | |
| 18 # only inlcude the result directory | |
| 19 # assumption, each galaxy file and so the result directory starts with dataset_xxx | |
| 20 if root.find('dataset_') != -1: | |
| 21 archive_root = os.path.abspath(root)[root_len:] | |
| 22 for f in files: | |
| 23 fullpath = os.path.join(root, f) | |
| 24 archive_name = os.path.join(archive_root, f) | |
| 25 zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) | |
| 26 zip.close() | |
| 27 return zip_file | |
| 28 | |
| 29 | |
| 30 def anitSMASH(args): | |
| 31 #./antismash.py Tue6071_genome.fasta --geneclustertypes 1 --fullhmm y | |
| 32 rint = random.randint(1,10000000) | |
| 33 tmp_dir = '/tmp/galaxy_%s' % rint | |
| 34 os.mkdir(tmp_dir) | |
| 35 os.mkdir(os.path.join( tmp_dir, 'geneprediction' )) | |
| 36 os.chdir(tmp_dir) | |
| 37 new_input_path = os.path.join(tmp_dir, os.path.basename(args.input) + '.fasta') | |
| 38 | |
| 39 # try to generate the same name as in antismash.py | |
| 40 genomename = ".".join( (os.path.basename(args.input) + '.fasta').split(".")[:-1] ) | |
| 41 for i in """!"#$%&()*+,./:;=>?@[]^`{|}'""": | |
| 42 genomename = genomename.replace(i,"") | |
| 43 result_path = os.path.join( tmp_dir, genomename ) | |
| 44 | |
| 45 shutil.copy(args.input, new_input_path ) | |
| 46 | |
| 47 if args.eukaryotic: | |
| 48 taxon = '--taxon e' | |
| 49 else: | |
| 50 taxon = '--taxon p' | |
| 51 | |
| 52 if args.clusterblast: | |
| 53 clusterblast = '--clusterblast y' | |
| 54 else: | |
| 55 clusterblast = '--clusterblast n' | |
| 56 | |
| 57 if args.smcogs: | |
| 58 smcogs = '--smcogs y' | |
| 59 else: | |
| 60 smcogs = '--smcogs n' | |
| 61 | |
| 62 if args.fullhmm: | |
| 63 fullhmm = '--fullhmm y' | |
| 64 else: | |
| 65 fullhmm = '--fullhmm n' | |
| 66 | |
| 67 if args.fullblast: | |
| 68 fullblast = '--fullblast y' | |
| 69 else: | |
| 70 fullblast = '--fullblast n' | |
| 71 | |
| 72 h = [antismash_path, new_input_path, | |
| 73 '--geneclustertypes %s' % args.geneclustertypes, | |
| 74 taxon, | |
| 75 clusterblast, | |
| 76 smcogs, | |
| 77 fullhmm, | |
| 78 fullblast, | |
| 79 '--glimmer_prediction %s' % args.glimmer_prediction, | |
| 80 '--blastdbpath %s' % blastdbpath, | |
| 81 '--pfamdbpath %s' % pfamdbpath, | |
| 82 '--cores 10', | |
| 83 ] | |
| 84 a = ' '.join(h) | |
| 85 subprocess.call(a, shell=True) | |
| 86 | |
| 87 | |
| 88 shutil.copy(os.path.join(result_path, '%s.final.embl' % genomename), args.embl_path) | |
| 89 | |
| 90 clustername_mapping = {} | |
| 91 for line in open( os.path.join(result_path, 'clusterblast/geneclusters.txt') ): | |
| 92 token = line.split('\t') | |
| 93 clustername_mapping[token[2]] = token[3] | |
| 94 | |
| 95 for line in open( os.path.join(result_path, 'clusterblast/geneclusterprots.fasta') ): | |
| 96 if line.startswith('>'): | |
| 97 for k,v in clustername_mapping.items(): | |
| 98 if '|%s|' % k in line: | |
| 99 args.geneclusterprots.write( line.replace('|%s|' % k, '|%s|%s|' % (k,v)) ) | |
| 100 else: | |
| 101 args.geneclusterprots.write( line ) | |
| 102 | |
| 103 zipper(result_path, args.zip) | |
| 104 | |
| 105 # html output | |
| 106 shutil.copy( os.path.join(result_path, 'display.xhtml'), args.html_file) | |
| 107 os.mkdir( args.html_path ) | |
| 108 html_dest_path = os.path.join(args.html_path, 'html/') | |
| 109 images_dest_path = os.path.join(args.html_path, 'images/') | |
| 110 svg_dest_path = os.path.join(args.html_path, 'svg/') | |
| 111 substrspecs_dest_path = os.path.join(args.html_path, 'substrspecs/') | |
| 112 shutil.copytree( os.path.join(result_path, 'html/'), html_dest_path) | |
| 113 shutil.copytree( os.path.join(result_path, 'images/'), images_dest_path) | |
| 114 shutil.copytree( os.path.join(result_path, 'svg/'), svg_dest_path) | |
| 115 shutil.copytree( os.path.join(result_path, 'substrspecs/'), substrspecs_dest_path) | |
| 116 shutil.copy( os.path.join(result_path, 'jquery.svg.js'), args.html_path ) | |
| 117 shutil.copy( os.path.join(result_path, 'jquery.svgdom.js'), args.html_path ) | |
| 118 shutil.copy( os.path.join(result_path, 'jquery-1.4.2.min.js'), args.html_path ) | |
| 119 shutil.copy( os.path.join(result_path, 'style.css'), args.html_path ) | |
| 120 | |
| 121 # remove tmp directory | |
| 122 shutil.rmtree(tmp_dir) | |
| 123 | |
| 124 | |
| 125 def arg_parse(): | |
| 126 import argparse | |
| 127 parser = argparse.ArgumentParser(prog = 'antiSMASH-Wrapper') | |
| 128 parser.add_argument('--version', action='version', version='%(prog)s 0.01') | |
| 129 parser.add_argument('--geneclustertypes', | |
| 130 help='Fingerprint Type, currently FP2, FP3, FP4') | |
| 131 parser.add_argument('--clusterblast', action='store_true') | |
| 132 parser.add_argument('--eukaryotic', action='store_true') | |
| 133 parser.add_argument('--fullhmm', action='store_true') | |
| 134 parser.add_argument('--smcogs', action='store_true') | |
| 135 parser.add_argument('--fullblast', action='store_true') | |
| 136 | |
| 137 parser.add_argument('--input', '-i', help='FASTA Sequence File') | |
| 138 parser.add_argument('--glimmer_prediction', help='Glimmer Prediction File') | |
| 139 | |
| 140 parser.add_argument('--zip', help='output: all files as zip file') | |
| 141 parser.add_argument('--html_file', help='output: the path to the index html file') | |
| 142 parser.add_argument('--html_path', help='output: the path to the output html dir') | |
| 143 parser.add_argument('--embl_path', help='output: the path to the embl output file') | |
| 144 parser.add_argument('--geneclusterprots', help='output: Genecluster Fasta File', type=argparse.FileType('w')) | |
| 145 | |
| 146 args = parser.parse_args() | |
| 147 return args | |
| 148 | |
| 149 | |
| 150 if __name__ == '__main__': | |
| 151 args = arg_parse() | |
| 152 anitSMASH(args) | |
| 153 |
