comparison alignment/phytab_mview.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/env python
2 ## usage: ./phytab_mview.py -i <phytabinput> -d <protein|dna>
3 ## splits up an aligned phytab file containing multiple genes into
4 ## individual files to run mview
5
6 import sys, os, os.path, tempfile, shutil, re, shlex, subprocess
7 import optparse
8 from multiprocessing import Pool
9
10 #define some variables to call later:
11
12 directory = ""
13 extension = ".fs"
14 html_header = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
15 <HTML>
16 <HEAD>
17 <TITLE></TITLE>
18 </HEAD>
19 <BODY BGCOLOR='white' TEXT='black' LINK='blue' ALINK='red' VLINK='purple'>
20 <H1>PHYTAB MVIEW ALIGNMENT VIEWER</H1>
21 <PRE>Select from below to view aligned sequence as HTML (left) or FASTA (right) in browser.
22 </PRE>
23 <table border="1" bordercolor="#000000" style="background-color:#FFFFFF" width="300" cellpadding="3" cellspacing="0">
24 <tr>
25 <td>mview HTML</td>
26 <!--<td>FASTA</td>-->
27 </tr>"""
28 html_close = """
29 <P><SMALL><A HREF="http://bio-mview.sourceforge.net">MView</A> </SMALL><BR>
30 </BODY>
31 </HTML>"""
32
33 #define some functions to call in 'main':
34 # first, sanitize problematic characters
35 def unescape(string):
36 mapped_chars = {
37 '>': '__gt__',
38 '<': '__lt__',
39 "'": '__sq__',
40 '"': '__dq__',
41 '[': '__ob__',
42 ']': '__cb__',
43 '{': '__oc__',
44 '}': '__cc__',
45 '@': '__at__',
46 '\n': '__cn__',
47 '\r': '__cr__',
48 '\t': '__tc__',
49 '#': '__pd__'
50 }
51
52 for key, value in mapped_chars.iteritems():
53 string = string.replace(value, key)
54
55 return string
56 # next, define tabular --> fasta conversion
57 class Sequence:
58 def __init__(self, string):
59 lis = string.split()
60 self.species = lis[0]
61 self.family = lis[1]
62 self.name = lis[2]
63 self.header = ' '.join(lis[:-1])
64 self.sequence = lis[-1]
65 self.string = string
66
67 def printFASTA(self):
68 return '> ' + self.header + '\n' + self.sequence + '\n'
69
70 # then define function to apply preceding conversion method to all genes
71 # (creates separate file for each gene)
72 def saveMulti(tabFile):
73 with open(tabFile) as f:
74 for line in f:
75 seq = Sequence(line)
76 with open(seq.family + extension, "a") as p:
77 p.write(seq.printFASTA())
78
79 #subroutine to write main HTML output containing valid urls to mview htmls
80 def resultsto_output_html(html_mainoutput,basepath):
81 htmllist = [f for f in os.listdir(basepath) if 'html' in f]
82 sortedhtmllist = sorted(htmllist)
83 html = open(html_mainoutput, 'w')
84 html.write(html_header)
85 for f in sortedhtmllist:
86 f_path = os.path.join(basepath,f)
87 htmllink = '<tr><td><a href="' + f + '">' + f + '</a></td>\n'
88 html.write(htmllink)
89 html.write(html_close)
90 html.close()
91
92 def main():
93 #the command line arguments from the xml:
94 """
95 ##params for galaxy wrapper
96 $input
97 $dna
98 $output
99 "$output.extra_files_path" #save the htmlfiles here
100 """
101 inputphytabfile = sys.argv[1]
102 dnaorprotein = sys.argv[2]
103 output = sys.argv[3]
104 extra_files_path = sys.argv[4]
105
106 inputFile = unescape(inputphytabfile)
107 ##make the fasta files
108 saveMulti(inputFile)
109
110 #prepare to put mview htmls into valid path
111
112 if not os.path.isdir(extra_files_path): #make filepath for alns to go with galaxy info
113 os.makedirs(extra_files_path)
114
115 # execute mview on each fasta, storing in extra_files_path as <gene_aln>.html
116 list_of_fastafiles = [f for f in os.listdir(os.getcwd()) if 'fs' in f]
117 sortedfileorder = sorted(list_of_fastafiles)
118 for gene_aln in sortedfileorder:
119 result_htmlfile = gene_aln + '.html'
120 result_path = os.path.join(extra_files_path,result_htmlfile) #puts the htmls in permanent Galaxy directory
121 if dnaorprotein is 'dna':
122 cmd = subprocess.Popen(['mview','-in','pearson','-DNA','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE)
123 else:
124 cmd = subprocess.Popen(['mview','-in','pearson','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE)
125 cmd.wait()
126 out = cmd.communicate()[0]
127
128 with open(result_path, 'wb') as fileout:
129 fileout.write(out)
130 ##now have # of gene htmls in extra_files_path/
131
132 #write main html output
133 resultsto_output_html(output,extra_files_path)
134
135
136 if __name__ == '__main__':
137 main()
138