Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/phytab_aliscorecut.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
| author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
|---|---|
| date | Tue, 11 Mar 2014 12:19:13 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5b9a38ec4a39 |
|---|---|
| 1 import os | |
| 2 import optparse | |
| 3 import subprocess | |
| 4 from multiprocessing import Pool | |
| 5 import shutil | |
| 6 | |
| 7 results_dir = "./data" | |
| 8 results = "results.data" | |
| 9 fasta_extension = ".afa" | |
| 10 alicut_prefix = "ALICUT_" | |
| 11 familyList = [] | |
| 12 galaxy_tool_dir = "/home/galaxy/bin/" | |
| 13 forbidden_chars = { | |
| 14 '(': '__rb__', | |
| 15 ')': '__lb__', | |
| 16 ':': '__co__', | |
| 17 ';': '__sc__', | |
| 18 ',': '__cm__', | |
| 19 '--': '__dd__', | |
| 20 '*': '__st__', | |
| 21 '|': '__pi__', | |
| 22 ' ': '__sp__' | |
| 23 } | |
| 24 | |
| 25 | |
| 26 def unescape(string): | |
| 27 mapped_chars = { | |
| 28 '>': '__gt__', | |
| 29 '<': '__lt__', | |
| 30 "'": '__sq__', | |
| 31 '"': '__dq__', | |
| 32 '[': '__ob__', | |
| 33 ']': '__cb__', | |
| 34 '{': '__oc__', | |
| 35 '}': '__cc__', | |
| 36 '@': '__at__', | |
| 37 '\n': '__cn__', | |
| 38 '\r': '__cr__', | |
| 39 '\t': '__tc__', | |
| 40 '#': '__pd__' | |
| 41 } | |
| 42 | |
| 43 for key, value in mapped_chars.iteritems(): | |
| 44 string = string.replace(value, key) | |
| 45 | |
| 46 return string | |
| 47 | |
| 48 | |
| 49 def unpackData(families): | |
| 50 with open(families) as f: | |
| 51 for line in f: | |
| 52 seq = Sequence(line) | |
| 53 with open(results_dir + os.sep + seq.family + fasta_extension, "a") as p: | |
| 54 p.write(seq.printFASTA()) | |
| 55 | |
| 56 | |
| 57 class Sequence: | |
| 58 def __init__(self, string): | |
| 59 lis = string.split('\t') | |
| 60 self.species = lis[0] | |
| 61 self.family = lis[1] | |
| 62 self.name = lis[2] | |
| 63 self.header = ' '.join(lis[:-1]) | |
| 64 self.sequence = lis[-1] | |
| 65 self.string = string | |
| 66 | |
| 67 def escapedHeader(self): | |
| 68 string = self.header | |
| 69 for key, value in forbidden_chars.iteritems(): | |
| 70 string = string.replace(key, value) | |
| 71 return string | |
| 72 | |
| 73 def printFASTA(self): | |
| 74 return '>' + self.escapedHeader() + '\n' + self.sequence + '\n' | |
| 75 | |
| 76 | |
| 77 def unescapeHeader(header): | |
| 78 string = header | |
| 79 for key, value in forbidden_chars.iteritems(): | |
| 80 string = string.replace(value, key) | |
| 81 return string | |
| 82 | |
| 83 | |
| 84 def toData(text): | |
| 85 text = text.split('\n') | |
| 86 result = '' | |
| 87 for line in text: | |
| 88 if '>' in line: | |
| 89 line = '\n' + unescapeHeader(line.replace('>', "")) + '\t' | |
| 90 line = line.replace(" ", "\t") | |
| 91 result += line | |
| 92 return result[1:] # Index past the first newline char | |
| 93 | |
| 94 | |
| 95 def aliscore(input): | |
| 96 file_name = results_dir + os.sep + input | |
| 97 # print file_name | |
| 98 pop = subprocess.Popen(["perl", "-I", galaxy_tool_dir, galaxy_tool_dir + "Aliscore.02.pl", "-i", file_name]) | |
| 99 pop.wait() | |
| 100 | |
| 101 | |
| 102 def main(): | |
| 103 usage = """%prog [options] | |
| 104 options (listed below) default to 'None' if omitted | |
| 105 """ | |
| 106 parser = optparse.OptionParser(usage=usage) | |
| 107 | |
| 108 parser.add_option( | |
| 109 '-i', '--input', | |
| 110 dest='families', | |
| 111 action='store', | |
| 112 type='string', | |
| 113 metavar="FILE", | |
| 114 help='Name of input sequences.') | |
| 115 | |
| 116 options, args = parser.parse_args() | |
| 117 | |
| 118 families = unescape(options.families) | |
| 119 | |
| 120 os.mkdir(results_dir) | |
| 121 | |
| 122 unpackData(families) | |
| 123 | |
| 124 list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(fasta_extension)] | |
| 125 | |
| 126 pool = Pool() | |
| 127 pool.map(aliscore, list_of_files) | |
| 128 | |
| 129 alicut = "ALICUT_V2.0_modified.pl" | |
| 130 shutil.copy(galaxy_tool_dir + alicut, results_dir + os.sep + alicut) | |
| 131 os.chdir(results_dir) | |
| 132 pop = subprocess.Popen(["perl", "./" + alicut]) | |
| 133 pop.wait() | |
| 134 os.chdir("../") | |
| 135 | |
| 136 result = [file for file in os.listdir(results_dir) if file.startswith(alicut_prefix)] | |
| 137 with open(results_dir + os.sep + results, "a") as f: | |
| 138 for file in result: | |
| 139 if file.endswith(fasta_extension): | |
| 140 with open(results_dir + os.sep + file, "r") as r: | |
| 141 f.write(toData(r.read()) + "\n") | |
| 142 | |
| 143 if __name__ == '__main__': | |
| 144 main() |
