Mercurial > repos > fubar > blasttools_search_test
diff plotly_blast_tool/plotlyblast.xml @ 7:bb99f2c0e358 draft
Updated by regenerating with latest ToolFactory revision
author | fubar |
---|---|
date | Fri, 04 Aug 2023 01:52:13 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plotly_blast_tool/plotlyblast.xml Fri Aug 04 01:52:13 2023 +0000 @@ -0,0 +1,279 @@ +<tool name="plotlyblast" id="plotlyblast" version="3.0"> + <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> + <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:02 using the Galaxy Tool Factory.--> + <description>Plotly plot generator specialised for 25 column Galaxy blast search result tabular files</description> + <requirements> + <requirement version="1.5.3" type="package">pandas</requirement> + <requirement version="5.9.0" type="package">plotly</requirement> + <requirement version="0.2.1" type="package">python-kaleido</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + <version_command><![CDATA[echo "3.0"]]></version_command> + <command><![CDATA[python +$runme +--input_tab +$input_tab +--htmlout +$htmlout +--xcol +"$xcol" +--ycol +"$ycol" +--colourcol +"$colourcol" +--hovercol +"$hovercol" +--title +"$title" +--header +"$header" +--image_type +"$outputimagetype"]]></command> + <configfiles> + <configfile name="runme"><![CDATA[#raw + +import argparse +import shutil +import sys +import math +import plotly.express as px +import pandas as pd +# Ross Lazarus July 2023 +# based on various plotly tutorials +# Expects a blastn search result passed in as the first command line parameter. +parser = argparse.ArgumentParser() +a = parser.add_argument +a('--input_tab',default='') +a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') +a('--htmlout',default="test_run.html") +a('--xcol',default='') +a('--ycol',default='') +a('--colourcol',default='') +a('--hovercol',default='') +a('--title',default='test blast plot') +a('--image_type',default='short_html') +args = parser.parse_args() +df = pd.read_csv(args.input_tab, sep='\t') +NCOLS = df.columns.size +MAXLEN = 30 # tricky way to truncate long axis tickmarks +defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] +if len(args.header.strip()) > 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + df.columns = newcols + else: + sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) + df.columns = defaultcols +else: + df.columns = defaultcols +df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise +if len(args.colourcol.strip()) == 0: + fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) +else: + fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) +if args.title: + ftitle=dict(text=args.title, font=dict(size=40)) + fig.update_layout(title=ftitle) +for scatter in fig.data: + scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] + scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] + if len(args.colourcol.strip()) == 0: + sl = str(scatter['legendgroup']) + if len(sl) > MAXLEN: + scatter['legendgroup'] = sl[:MAXLEN] +if args.image_type == "short_html": + fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') +elif args.image_type == "long_html": + fig.write_html(args.htmlout) +elif args.image_type == "small_png": + ht = 768 + wdth = 1024 + fig.write_image('plotly.png', height=ht, width=wdth) + shutil.copyfile('plotly.png', args.htmlout) +else: + ht = 1200 + wdth = 1920 + fig.write_image('plotly.png', height=ht, width=wdth) + shutil.copyfile('plotly.png', args.htmlout) + + + +#end raw]]></configfile> + </configfiles> + <inputs> + <param name="input_tab" type="data" optional="false" label="Tabular input file to plot" help="" format="tabular" multiple="false"/> + <param name="xcol" type="text" value="qaccver" label="x axis for plot" help=""/> + <param name="ycol" type="text" value="bitscore" label="y axis for plot" help=""/> + <param name="colourcol" type="text" value="piden" label="column containing a groupable variable for colour. Default none." help="Adds a legend so choose wisely "/> + <param name="hovercol" type="text" value="qseq" label="columname for hover string" help=""/> + <param name="title" type="text" value="Qaccver by bitscore coloured by pident. Hover shows blast match." label="Title for the plot" help="Special characters will probably be escaped so do not use them"/> + <param name="header" type="text" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" label="Use this comma delimited list of column header names for this tabular file. Default is None when col1...coln will be used" help="Default for Galaxy blast outputs with 25 columns. The column names supplied for xcol, ycol, hover and colour MUST match either the supplied list, or if none, col1...coln."/> + <param name="outputimagetype" type="select" label="Select the output format for this plot image" help="Small and large png are not interactive but best for many (__gt__10k) points. Stand-alone HTML includes 3MB of javascript. Short form HTML gets it the usual way so can be cut and paste into documents."> + <option value="short_html">Short HTML interactive - requires network connection to download 3MB javascript</option> + <option value="long_html">Long HTML for stand-alone viewing where network access to libraries is not available.</option> + <option value="large_png">Large (1920x1200) png image - not interactive so hover column ignored</option> + <option value="small_png">Small (1024x768) png image - not interactive so hover column ignored</option> + </param> + </inputs> + <outputs> + <data name="htmlout" format="html" label="Plotlyblast $title on $input_tab.element_identifier" hidden="false"> + <change_format> + <when input="outputimagetype" format="png" value="small_png"/> + <when input="outputimagetype" format="png" value="large_png"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <output name="htmlout" value="htmlout_sample" compare="sim_size" delta="5000"/> + <param name="input_tab" value="input_tab_sample"/> + <param name="xcol" value="qaccver"/> + <param name="ycol" value="bitscore"/> + <param name="colourcol" value="piden"/> + <param name="hovercol" value="qseq"/> + <param name="title" value="Qaccver by bitscore coloured by pident. Hover shows blast match."/> + <param name="header" value="qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"/> + <param name="outputimagetype" value="short_html"/> + </test> + </tests> + <help><![CDATA[ + +This is a specialised version of a generic tabular file plotter tool plotlytabular + +PNG image outputs are not interactive but best for very large numbers of data points. Hover column will be ignored. + +HTML interactive plots are best for a few thousand data points at most because + +the hover information becomes uncontrollable with very dense points. + +Using the shorter format HTML relies on internet access when viewed, and saves 3MB of javascript being embedded. + +The long format is useful if potentially viewed offline. + + + +.. class:: warningmark + +NOTE: Long strings in x and y tickmarks WILL BE TRUNCATED if they are too long - ".." is added to indicate truncation - otherwise some plots are squished. + + + +.. class:: warningmark + +NOTE: Blast evalues WILL BE TRANSFORMED using -log10(evalue), so they are scaled in a way that doesn't confuse plotly.express with the tiny values. + + + +---- + + + +The main reason to run this tool is to have an interactive hover text specified so it appears when hovering over each data point to supply useful information. + + + +Assumes you want a hover display for an interactive plot to be informative + + + +Column names are auto-generated as col1,...coln unless a comma separated list of column names is supplied as the header parameter. + + + +For example, using a Galaxy blastn output with 25 columns, the following comma delimited string supplied as the "header" parameter will match the names of each column. + + qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles + + + +When a header is supplied, the xcol and other column names must match one of those supplied column names. + +So for example, xcol = "qaccver" for the blastn header example rather than xcol = "col1" when no header is supplied. + + + +Relies on Plotly python code released under the MIT licence: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt + + + + + + + +------ + + +Script:: + + import argparse + import shutil + import sys + import math + import plotly.express as px + import pandas as pd + # Ross Lazarus July 2023 + # based on various plotly tutorials + # Expects a blastn search result passed in as the first command line parameter. + parser = argparse.ArgumentParser() + a = parser.add_argument + a('--input_tab',default='') + a('--header',default='qaccver,saccver,piden,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles') + a('--htmlout',default="test_run.html") + a('--xcol',default='') + a('--ycol',default='') + a('--colourcol',default='') + a('--hovercol',default='') + a('--title',default='test blast plot') + a('--image_type',default='short_html') + args = parser.parse_args() + df = pd.read_csv(args.input_tab, sep='\t') + NCOLS = df.columns.size + MAXLEN = 30 # tricky way to truncate long axis tickmarks + defaultcols = ['col%d' % (x+1) for x in range(NCOLS)] + if len(args.header.strip()) > 0: + newcols = args.header.split(',') + if len(newcols) == NCOLS: + df.columns = newcols + else: + sys.stderr.write('#### Supplied header %s has %d comma delimited header names - does not match the input tabular file %d columns - using col1,...coln' % (args.header, len(newcols), NCOLS)) + df.columns = defaultcols + else: + df.columns = defaultcols + df['evalue'] = [-math.log(x + 1e-308) for x in df['evalue']] # convert so large values reflect statistical surprise + if len(args.colourcol.strip()) == 0: + fig = px.scatter(df, x=args.xcol, y=args.ycol, hover_name=args.hovercol) + else: + fig = px.scatter(df, x=args.xcol, y=args.ycol, color=args.colourcol, hover_name=args.hovercol) + if args.title: + ftitle=dict(text=args.title, font=dict(size=40)) + fig.update_layout(title=ftitle) + for scatter in fig.data: + scatter['x'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['x']] + scatter['y'] = [str(x)[:MAXLEN] + '..' if len(str(x)) > MAXLEN else x for x in scatter['y']] + if len(args.colourcol.strip()) == 0: + sl = str(scatter['legendgroup']) + if len(sl) > MAXLEN: + scatter['legendgroup'] = sl[:MAXLEN] + if args.image_type == "short_html": + fig.write_html(args.htmlout, full_html=False, include_plotlyjs='cdn') + elif args.image_type == "long_html": + fig.write_html(args.htmlout) + elif args.image_type == "small_png": + ht = 768 + wdth = 1024 + fig.write_image('plotly.png', height=ht, width=wdth) + shutil.copyfile('plotly.png', args.htmlout) + else: + ht = 1200 + wdth = 1920 + fig.write_image('plotly.png', height=ht, width=wdth) + shutil.copyfile('plotly.png', args.htmlout) + +]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts573</citation> + </citations> +</tool> +