Mercurial > repos > fubar > plotly_blast_plot
view plotly_blast_tool/plotlyblast.xml @ 3:82bcfcb60f97 draft default tip
Updated by re-generating with latest ToolFactory so png or html outputs can be requested. Output labels are informative too.
author | fubar |
---|---|
date | Fri, 04 Aug 2023 01:55:07 +0000 |
parents | 47a7eeec4a19 |
children |
line wrap: on
line source
<tool name="plotlyblast" id="plotlyblast" version="3.0"> <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.--> <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description> <requirements> <requirement version="1.5.3" type="package">pandas</requirement> <requirement version="5.9.0" type="package">plotly</requirement> <requirement version="0.2.1" type="package">python-kaleido</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal"/> </stdio> <version_command><![CDATA[echo "3.0"]]></version_command> <command><![CDATA[python $runme --input_tab $input_tab --htmlout $htmlout --xcol "$xcol" --ycol "$ycol" --colourcol "$colourcol" --hovercol "$hovercol" --title "$title" --header "$header" --image_type "$outputimagetype"]]></command> <configfiles> <configfile name="runme"><![CDATA[#raw import argparse import shutil import sys import math import plotly.express as px import pandas as pd # Ross Lazarus July 2023 # based on various plotly tutorials # Expects a blastn search result passed in as the first command line parameter. parser = argparse.ArgumentParser() a = parser.add_argument a('--input_tab',default='') a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') a('--htmlout',default="test_run.html") a('--xcol',default='') a('--ycol',default='') a('--colourcol',default='') a('--hovercol',default='') a('--title',default='test blast plot') a('--image_type',default='short_html') args = parser.parse_args() df = pd.read_csv(args.input_tab, sep='\t') NCOLS = df.columns.size MAXLEN = 30 # tricky way to truncate long axis tickmarks defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] if len(args.header.strip()) > 0: newcols = args.header.split(',') if len(newcols) == NCOLS: df.columns = newcols else: sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) df.columns = defaultcols else: df.columns = defaultcols df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise if len(args.colourcol.strip()) == 0: fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) else: fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) if args.title: ftitle=dict(text=args.title, font=dict(size=40)) fig.update_layout(title=ftitle) for scatter in fig.data: scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] if len(args.colourcol.strip()) == 0: sl = str(scatter['legendgroup']) if len(sl) > MAXLEN: scatter['legendgroup'] = sl[:MAXLEN] if args.image_type == "short_html": fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') elif args.image_type == "long_html": fig.write_html(args.htmlout) elif args.image_type == "small_png": ht = 768 wdth = 1024 fig.write_image('plotly.png', height=ht, width=wdth) shutil.copyfile('plotly.png', args.htmlout) else: ht = 1200 wdth = 1920 fig.write_image('plotly.png', height=ht, width=wdth) shutil.copyfile('plotly.png', args.htmlout) #end raw]]></configfile> </configfiles> <inputs> <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/> <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/> <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/> <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/> <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/> <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/> <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/> <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents."> <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option> <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option> <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option> <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option> </param> </inputs> <outputs> <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false"> <change_format> <when input="outputimagetype" format="png" value="small_png"/> <when input="outputimagetype" format="png" value="large_png"/> </change_format> </data> </outputs> <tests> <test> <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/> <param name="input_tab" value="input_tab_sample"/> <param name="xcol" value="qaccver"/> <param name="ycol" value="bitscore"/> <param name="colourcol" value="piden"/> <param name="hovercol" value="qseq"/> <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/> <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/> <param name="outputimagetype" value="short_html"/> </test> </tests> <help><![CDATA[ This is a specialised version of a generic tabular file plotter tool plotlytabular PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored. HTML interactive plots are best for a few thousand data points at most because the hover information becomes uncontrollable with very dense points. Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded. The long format is useful if potentially viewed offline. .. class:: warningmark NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished. .. class:: warningmark NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values. ---- The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. Assumes you want a hover display for an interactive plot to be informative Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter. For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column. qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles When a header is supplied, the xcol and other column names must match one of those supplied column names. So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied. Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt ------ Script:: import argparse import shutil import sys import math import plotly.express as px import pandas as pd # Ross Lazarus July 2023 # based on various plotly tutorials # Expects a blastn search result passed in as the first command line parameter. parser = argparse.ArgumentParser() a = parser.add_argument a('--input_tab',default='') a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') a('--htmlout',default="test_run.html") a('--xcol',default='') a('--ycol',default='') a('--colourcol',default='') a('--hovercol',default='') a('--title',default='test blast plot') a('--image_type',default='short_html') args = parser.parse_args() df = pd.read_csv(args.input_tab, sep='\t') NCOLS = df.columns.size MAXLEN = 30 # tricky way to truncate long axis tickmarks defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] if len(args.header.strip()) > 0: newcols = args.header.split(',') if len(newcols) == NCOLS: df.columns = newcols else: sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) df.columns = defaultcols else: df.columns = defaultcols df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise if len(args.colourcol.strip()) == 0: fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) else: fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) if args.title: ftitle=dict(text=args.title, font=dict(size=40)) fig.update_layout(title=ftitle) for scatter in fig.data: scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] if len(args.colourcol.strip()) == 0: sl = str(scatter['legendgroup']) if len(sl) > MAXLEN: scatter['legendgroup'] = sl[:MAXLEN] if args.image_type == "short_html": fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') elif args.image_type == "long_html": fig.write_html(args.htmlout) elif args.image_type == "small_png": ht = 768 wdth = 1024 fig.write_image('plotly.png', height=ht, width=wdth) shutil.copyfile('plotly.png', args.htmlout) else: ht = 1200 wdth = 1920 fig.write_image('plotly.png', height=ht, width=wdth) shutil.copyfile('plotly.png', args.htmlout) ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/bts573</citation> </citations> </tool>