7
|
1 <tool name="plotlyblast" id="plotlyblast" version="3.0">
|
|
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
|
|
3 <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.-->
|
|
4 <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description>
|
|
5 <requirements>
|
|
6 <requirement version="1.5.3" type="package">pandas</requirement>
|
|
7 <requirement version="5.9.0" type="package">plotly</requirement>
|
|
8 <requirement version="0.2.1" type="package">python-kaleido</requirement>
|
|
9 </requirements>
|
|
10 <stdio>
|
|
11 <exit_code range="1:" level="fatal"/>
|
|
12 </stdio>
|
|
13 <version_command><![CDATA[echo "3.0"]]></version_command>
|
|
14 <command><![CDATA[python
|
|
15 $runme
|
|
16 --input_tab
|
|
17 $input_tab
|
|
18 --htmlout
|
|
19 $htmlout
|
|
20 --xcol
|
|
21 "$xcol"
|
|
22 --ycol
|
|
23 "$ycol"
|
|
24 --colourcol
|
|
25 "$colourcol"
|
|
26 --hovercol
|
|
27 "$hovercol"
|
|
28 --title
|
|
29 "$title"
|
|
30 --header
|
|
31 "$header"
|
|
32 --image_type
|
|
33 "$outputimagetype"]]></command>
|
|
34 <configfiles>
|
|
35 <configfile name="runme"><![CDATA[#raw
|
|
36
|
|
37 import argparse
|
|
38 import shutil
|
|
39 import sys
|
|
40 import math
|
|
41 import plotly.express as px
|
|
42 import pandas as pd
|
|
43 # Ross Lazarus July 2023
|
|
44 # based on various plotly tutorials
|
|
45 # Expects a blastn search result passed in as the first command line parameter.
|
|
46 parser = argparse.ArgumentParser()
|
|
47 a = parser.add_argument
|
|
48 a('--input_tab',default='')
|
|
49 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
|
|
50 a('--htmlout',default="test_run.html")
|
|
51 a('--xcol',default='')
|
|
52 a('--ycol',default='')
|
|
53 a('--colourcol',default='')
|
|
54 a('--hovercol',default='')
|
|
55 a('--title',default='test blast plot')
|
|
56 a('--image_type',default='short_html')
|
|
57 args = parser.parse_args()
|
|
58 df = pd.read_csv(args.input_tab, sep='\t')
|
|
59 NCOLS = df.columns.size
|
|
60 MAXLEN = 30 # tricky way to truncate long axis tickmarks
|
|
61 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
|
|
62 if len(args.header.strip()) > 0:
|
|
63 newcols = args.header.split(',')
|
|
64 if len(newcols) == NCOLS:
|
|
65 df.columns = newcols
|
|
66 else:
|
|
67 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
|
|
68 df.columns = defaultcols
|
|
69 else:
|
|
70 df.columns = defaultcols
|
|
71 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
|
|
72 if len(args.colourcol.strip()) == 0:
|
|
73 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
|
|
74 else:
|
|
75 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
|
|
76 if args.title:
|
|
77 ftitle=dict(text=args.title, font=dict(size=40))
|
|
78 fig.update_layout(title=ftitle)
|
|
79 for scatter in fig.data:
|
|
80 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
|
|
81 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
|
|
82 if len(args.colourcol.strip()) == 0:
|
|
83 sl = str(scatter['legendgroup'])
|
|
84 if len(sl) > MAXLEN:
|
|
85 scatter['legendgroup'] = sl[:MAXLEN]
|
|
86 if args.image_type == "short_html":
|
|
87 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
|
|
88 elif args.image_type == "long_html":
|
|
89 fig.write_html(args.htmlout)
|
|
90 elif args.image_type == "small_png":
|
|
91 ht = 768
|
|
92 wdth = 1024
|
|
93 fig.write_image('plotly.png', height=ht, width=wdth)
|
|
94 shutil.copyfile('plotly.png', args.htmlout)
|
|
95 else:
|
|
96 ht = 1200
|
|
97 wdth = 1920
|
|
98 fig.write_image('plotly.png', height=ht, width=wdth)
|
|
99 shutil.copyfile('plotly.png', args.htmlout)
|
|
100
|
|
101
|
|
102
|
|
103 #end raw]]></configfile>
|
|
104 </configfiles>
|
|
105 <inputs>
|
|
106 <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/>
|
|
107 <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/>
|
|
108 <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/>
|
|
109 <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/>
|
|
110 <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/>
|
|
111 <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/>
|
|
112 <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/>
|
|
113 <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents.">
|
|
114 <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option>
|
|
115 <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option>
|
|
116 <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option>
|
|
117 <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option>
|
|
118 </param>
|
|
119 </inputs>
|
|
120 <outputs>
|
|
121 <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false">
|
|
122 <change_format>
|
|
123 <when input="outputimagetype" format="png" value="small_png"/>
|
|
124 <when input="outputimagetype" format="png" value="large_png"/>
|
|
125 </change_format>
|
|
126 </data>
|
|
127 </outputs>
|
|
128 <tests>
|
|
129 <test>
|
|
130 <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/>
|
|
131 <param name="input_tab" value="input_tab_sample"/>
|
|
132 <param name="xcol" value="qaccver"/>
|
|
133 <param name="ycol" value="bitscore"/>
|
|
134 <param name="colourcol" value="piden"/>
|
|
135 <param name="hovercol" value="qseq"/>
|
|
136 <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/>
|
|
137 <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/>
|
|
138 <param name="outputimagetype" value="short_html"/>
|
|
139 </test>
|
|
140 </tests>
|
|
141 <help><![CDATA[
|
|
142
|
|
143 This is a specialised version of a generic tabular file plotter tool plotlytabular
|
|
144
|
|
145 PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored.
|
|
146
|
|
147 HTML interactive plots are best for a few thousand data points at most because
|
|
148
|
|
149 the hover information becomes uncontrollable with very dense points.
|
|
150
|
|
151 Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded.
|
|
152
|
|
153 The long format is useful if potentially viewed offline.
|
|
154
|
|
155
|
|
156
|
|
157 .. class:: warningmark
|
|
158
|
|
159 NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished.
|
|
160
|
|
161
|
|
162
|
|
163 .. class:: warningmark
|
|
164
|
|
165 NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values.
|
|
166
|
|
167
|
|
168
|
|
169 ----
|
|
170
|
|
171
|
|
172
|
|
173 The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information.
|
|
174
|
|
175
|
|
176
|
|
177 Assumes you want a hover display for an interactive plot to be informative
|
|
178
|
|
179
|
|
180
|
|
181 Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter.
|
|
182
|
|
183
|
|
184
|
|
185 For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column.
|
|
186
|
|
187 qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles
|
|
188
|
|
189
|
|
190
|
|
191 When a header is supplied, the xcol and other column names must match one of those supplied column names.
|
|
192
|
|
193 So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied.
|
|
194
|
|
195
|
|
196
|
|
197 Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
|
|
198
|
|
199
|
|
200
|
|
201
|
|
202
|
|
203
|
|
204
|
|
205 ------
|
|
206
|
|
207
|
|
208 Script::
|
|
209
|
|
210 import argparse
|
|
211 import shutil
|
|
212 import sys
|
|
213 import math
|
|
214 import plotly.express as px
|
|
215 import pandas as pd
|
|
216 # Ross Lazarus July 2023
|
|
217 # based on various plotly tutorials
|
|
218 # Expects a blastn search result passed in as the first command line parameter.
|
|
219 parser = argparse.ArgumentParser()
|
|
220 a = parser.add_argument
|
|
221 a('--input_tab',default='')
|
|
222 a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles')
|
|
223 a('--htmlout',default="test_run.html")
|
|
224 a('--xcol',default='')
|
|
225 a('--ycol',default='')
|
|
226 a('--colourcol',default='')
|
|
227 a('--hovercol',default='')
|
|
228 a('--title',default='test blast plot')
|
|
229 a('--image_type',default='short_html')
|
|
230 args = parser.parse_args()
|
|
231 df = pd.read_csv(args.input_tab, sep='\t')
|
|
232 NCOLS = df.columns.size
|
|
233 MAXLEN = 30 # tricky way to truncate long axis tickmarks
|
|
234 defaultcols = ['col%d' % (x+1) for x in range(NCOLS)]
|
|
235 if len(args.header.strip()) > 0:
|
|
236 newcols = args.header.split(',')
|
|
237 if len(newcols) == NCOLS:
|
|
238 df.columns = newcols
|
|
239 else:
|
|
240 sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS))
|
|
241 df.columns = defaultcols
|
|
242 else:
|
|
243 df.columns = defaultcols
|
|
244 df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise
|
|
245 if len(args.colourcol.strip()) == 0:
|
|
246 fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol)
|
|
247 else:
|
|
248 fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol)
|
|
249 if args.title:
|
|
250 ftitle=dict(text=args.title, font=dict(size=40))
|
|
251 fig.update_layout(title=ftitle)
|
|
252 for scatter in fig.data:
|
|
253 scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']]
|
|
254 scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']]
|
|
255 if len(args.colourcol.strip()) == 0:
|
|
256 sl = str(scatter['legendgroup'])
|
|
257 if len(sl) > MAXLEN:
|
|
258 scatter['legendgroup'] = sl[:MAXLEN]
|
|
259 if args.image_type == "short_html":
|
|
260 fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn')
|
|
261 elif args.image_type == "long_html":
|
|
262 fig.write_html(args.htmlout)
|
|
263 elif args.image_type == "small_png":
|
|
264 ht = 768
|
|
265 wdth = 1024
|
|
266 fig.write_image('plotly.png', height=ht, width=wdth)
|
|
267 shutil.copyfile('plotly.png', args.htmlout)
|
|
268 else:
|
|
269 ht = 1200
|
|
270 wdth = 1920
|
|
271 fig.write_image('plotly.png', height=ht, width=wdth)
|
|
272 shutil.copyfile('plotly.png', args.htmlout)
|
|
273
|
|
274 ]]></help>
|
|
275 <citations>
|
|
276 <citation type="doi">10.1093/bioinformatics/bts573</citation>
|
|
277 </citations>
|
|
278 </tool>
|
|
279
|