comparison phylostatistics/phytab_LB_pruner.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 import os
2 import optparse
3 import subprocess
4 from multiprocessing import Pool
5
6 directory = ""
7 results = "results.data"
8 extension = ""
9 aligned_extension = ".tab"
10 datatype = ""
11
12 perlpath = "/home/galaxy-dist/tools/osiris/tree-manipulation/"
13
14 def unescape(string):
15 mapped_chars = {
16 '>': '__gt__',
17 '<': '__lt__',
18 "'": '__sq__',
19 '"': '__dq__',
20 '[': '__ob__',
21 ']': '__cb__',
22 '{': '__oc__',
23 '}': '__cc__',
24 '@': '__at__',
25 '\n': '__cn__',
26 '\r': '__cr__',
27 '\t': '__tc__',
28 '#': '__pd__'
29 }
30
31 for key, value in mapped_chars.iteritems():
32 string = string.replace(value, key)
33
34 return string
35
36
37 def isTabular(file):
38 with open(file) as f:
39 for line in f:
40 if line[0] == '>':
41 return False
42 return True
43
44 #def toData(text, name):
45 # name = name.replace("fasta", "") #file name has fasta when fasta file called
46 # text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "")
47 # return text
48
49
50 def toData(text, name):
51 text = text.split('\n')
52 result = ''
53 for line in text:
54 if '\t' in line:
55 line = line.replace("./data/","") + "\n"
56 result += line
57 return result # Index past the first newline char
58
59 def LB_pruner(input):
60 file_name = directory + os.sep + input
61 popen = subprocess.Popen(['perl', perlpath+'LB_prunerG.pl', file_name, indata, file_name + aligned_extension])
62 popen.wait()
63
64 class Sequence:
65 def __init__(self, string):
66 lis = string.split()
67 self.name = lis[0]
68 self.tree = lis[1]
69 self.string = string
70
71 def printFASTA(self):
72 return self.tree + '\n'
73
74 def saveMulti(tabFile):
75 with open(tabFile) as f:
76 for line in f:
77 seq = Sequence(line)
78 with open(directory + os.sep + seq.name + extension, "a") as p:
79 p.write(seq.printFASTA())
80
81 def saveSingle(fastaFile):
82 with open(fastaFile) as f:
83 for line in f:
84 with open(directory + os.sep + "fasta" + extension, "a") as p:
85 p.write(line)
86
87 def main():
88 usage = """%prog [options]
89 options (listed below) default to 'None' if omitted
90 """
91 parser = optparse.OptionParser(usage=usage)
92
93 parser.add_option(
94 '-d', '--directory',
95 metavar="PATH",
96 dest='path',
97 default='.',
98 help='Path to working directory.')
99
100 parser.add_option(
101 '-i', '--in',
102 dest='input',
103 action='store',
104 type='string',
105 metavar="FILE",
106 help='Name of input data.')
107
108 parser.add_option(
109 '-m', '--mult',
110 dest='datatype',
111 action='store',
112 type='string',
113 help='Multiplier')
114
115 options, args = parser.parse_args()
116
117 global directory
118 global indata
119 inputFile = unescape(options.input)
120 directory = unescape(options.path) + os.sep + "data"
121 indata = unescape(options.datatype)
122
123 os.mkdir(directory)
124
125 if isTabular(inputFile):
126 saveMulti(inputFile)
127 else:
128 saveSingle(inputFile)
129
130 pool = Pool()
131 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
132 pool.map(LB_pruner, list_of_files)
133
134 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
135 with open(directory + os.sep + results, "a") as f:
136 for file in result:
137 with open(directory + os.sep + file, "r") as r:
138 f.write(toData(r.read(),file))
139
140 if __name__ == '__main__':
141 main()
142