view rgToolFactory2.xml @ 25:9fe74bd23af2 draft

Uploaded
author fubar
date Mon, 02 Mar 2015 05:18:21 -0500
parents
children db35d39e1de9
line wrap: on
line source

<tool id="rgTF2" name="Tool Factory Two" version="1.17">
  <description>Scripts into tools</description>
   <requirements>
      <requirement type="package" version="9.10">ghostscript</requirement>
      <requirement type="package" version="1.3.20">graphicsmagick</requirement>
  </requirements>
  <code file="getlocalrpackages.py"/>
  <command interpreter="python">
#import os
#set dev_env = os.environ.get('GALAXY_DEVELOPMENT_ENVIRONMENT', '0') == '1'
#if not $dev_env and ( $__user_email__ not in $__admin_users__ ):
     rgToolFactory2.py --bad_user $__user_email__
#else:
    rgToolFactory2.py --script_path "$runme" --interpreter "$interp.interpreter" 
     --tool_name "$tool_name"  --user_email "$__user_email__" --citations "$citeme"
     --envshpath "$interp.envpath"

    #if $make_TAB.value=="yes":
          --output_tab "$output1"
          --output_format "$output_format"
    #end if
    #if $makeMode.make_Tool=="yes":
      --make_Tool "$makeMode.make_Tool"
      --tool_desc "$makeMode.tool_desc"
      --tool_version "$makeMode.tool_version"
      --new_tool "$new_tool"
      --help_text "$helpme"
      #if $make_HTML.value=="yes":
          #if $makeMode.include_deps.value=="yes":
             --include_dependencies "yes"
          #end if
      #end if
    #end if
    #if $additional_parameters != 'None':
      #if $edit_params.value == "yes":
         --edit_additional_parameters
      #end if
      #for i in $additional_parameters:
        --additional_parameters "$i.param_name,$i.param_value,$i.param_label,$i.param_help,$i.param_type"
      #end for
    #end if
    #if $make_HTML.value=="yes":
      --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"
    #else:
       --output_dir "."
    #end if
    #if len($input_tab) != 0:
        --input_formats "$input_formats"
        #for $intab in $input_tab:
            #if $intab.ext != 'data':
              --input_tab "${intab},${intab.name}"
            #end if
        #end for
    #end if
#end if 
  </command>
  <inputs>
    <param name="input_tab"  type="data"  label="Select one or more input file(s) from your history" optional="true" size="120" multiple="true"
       help="Use the multiple input widget (above/right of input box) for multiple inputs - your script MUST be ready to parse the command line right - see samples below"/>
    <param name="input_formats" type="select" multiple="true" label="Select the datatype(s) that your tool/script accepts as input"
      help="If your datatype is not listed here, it has to be added in galaxy's datatypes_conf.xml">
       <options from_parameter="tool.app.datatypes_registry.upload_file_formats">
        <column name="value" index="0"/>
       </options>
    </param>
    <param name="tool_name" type="text" value="My dynamic script"   label="New tool ID and title for outputs" size="60"
         help="This will become the toolshed repository name so please choose thoughtfully to avoid namespace clashes with other tool writers">
        <sanitizer invalid_char="">
            <valid initial="string.letters,string.digits">
                <add value="_"/>
            </valid>
        </sanitizer>
    </param>
    <conditional name="makeMode">
        <param name="make_Tool" type="select" label="Generate new tool as  a tar.gz file ready to upload to a toolshed repository" 
          help="Generate a toolshed archive - upload to a toolshed from where it can be auto-installed via the Galaxy admin functions" 
          size="60">
        <option value="yes">Generate a Galaxy ToolShed compatible toolshed.gz</option>
        <option value="" selected="true">No. Just run the script please</option>
        </param>
        <when value = "yes">
            <param name="tool_version" label="Tool Version - bump this to warn users trying to redo old analyses" type="text" value="0.01"
            help="If you change your script and regenerate the 'same' tool, you should inform Galaxy (and users) by changing (bumping is traditional) this number"/>
            <param name="tool_desc" label="Tool Description" type="text" value="" size="40" 
             help="Supply a brief tool description for the Galaxy tool menu entry (optional - appears after the tool name)" />
            <param name="help_text" label="Tool form documentation and help text for users" type="text" area="true" 
             size="8x120" value="**What it Does**" 
             help="Supply the brief user documentation to appear on the new tool form as reStructured text - http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html" >           
                <sanitizer>
                    <valid initial="string.printable">
                    </valid>
                    <mapping initial="none"/>
                </sanitizer>
            </param>
            <repeat name="citations" title="Citation">
                <conditional name="citation_type">
                    <param name="type" type="select" label="Citation Type">
                        <option value="doi">DOI</option>
                        <option value="bibtex">BibTeX</option>
                    </param>
                    <when value="doi">
                        <param name="doi" label="DOI" type="text" value="" 
                        help="Supply a DOI (e.g. doi: 10.1111/j.1740-9713.2007.00258.x) that should be cited when this tool is used in published research." />
                    </when>
                    <when value="bibtex">
                        <param name="bibtex" label="BibTex" type="text" area="true" size="8x120"
                            help="Supply a BibTex entry that should be cited when this tool is used in published research." value="" >
                            <sanitizer>
                                <valid initial="string.printable">
                                </valid>
                                <mapping initial="none"/>
                            </sanitizer>
                        </param>
                    </when>
                </conditional>
            </repeat>
            <param name="include_deps" type="select" label="Include ghostscript and graphicsmagick dependencies in generated tool" size="60"
            help="If an HTML file is being created, including dependencies is recommended. Otherwise this setting has no effect">
                <option value="">Rely on system ghostscript and graphicsmagick rather than include these as dependencies</option>
                <option value="yes" selected="true">Include dependencies so target installations will always work if HTML is being generated</option>
            </param>
        </when>
        <when value = "">
        </when>
    </conditional> 
    <param name="make_HTML" type="select" label="Create an HTML report with links to every output file your script writes to the current directory" size="60"
         help="Recommended for presenting complex outputs in an accessible manner. Turn off for simple tools so they just create one output">
        <option value="yes">Yes, arrange and link all output files written by my script to the current directory as an HTML page</option>
        <option value="" selected="true">No, no HTML output file thanks</option>
    </param>
    <param name="make_TAB" type="select" label="Create a new (default tabular) history output with or without an HTML file specified above" 
         help="This is useful if your script creates a single new tabular file you want to appear in the history after the tool executes">
        <option value="yes" selected="true">My script writes to a new history output</option>
        <option value="">I do not want a new history output file</option>
    </param>
    <param name="output_format" type="select" multiple="false" label="Select the datatype that your tool/script emits as output"
      help="If your datatype is not listed here, it has to be added in galaxy's datatypes_conf.xml">
     <options from_parameter="tool.app.datatypes_registry.upload_file_formats">
        <column name="value" index="0"/>
     </options>
    </param>
    <conditional name="interp">
    <param name="interpreter" type="select" label="Select the interpreter for your code. This must be available on the path of the execution host">
        <option value="Rscript" selected="true">Rscript</option>
        <option value="python">python</option>
        <option value="perl">perl</option>
        <option value="bash">bash</option>
        <option value="sh">sh</option>
    </param>
    <when value="Rscript">
        <param name="envpath"  type="select" label="Interpreter to use" dynamic_options="find_packages(prefix='package_r_')"
        help = "Select the R interpreter to use when running this code - should show all installed tool shed package_r_..." /> 
    </when>
    <when value="python">
        <param name="envpath"  type="select" label="Interpreter to use" dynamic_options="find_packages(prefix='package_python_')"
        help = "Select the python dependency to use when running this code - should show all installed tool shed package_python_..." /> 
    </when>
    <when value="perl">
        <param name="envpath"  type="select" label="Interpreter to use" dynamic_options="find_packages(prefix='package_perl_')"
        help = "Select the Perl interpreter to use when running this code - should show all installed tool shed package_perl_..." /> 
    </when>
    <when value="bash">
        <param name="envpath" type="hidden" value="system" />
    </when>
    <when value="sh">
        <param name="envpath" type="hidden" value="system" />
    </when>
    </conditional>
    <param name="edit_params" type="select" label="Add all additional parameters to the generated tool form so they are user editable?" 
         help="If no (default), users will NOT be able to alter any additional parameters. If yes, these will appear on the tool form as text fields with no validation or sanitizing">
        <option value="yes">Yes, allow user to edit all additional parameters on the generated tool form</option>
        <option value="no" selected="true">No - use the fixed values for all additional parameters - no user editing</option>
    </param>
    <repeat name="additional_parameters" title="Pass additional parameters to the script"
         help="See examples below to see how these can be parsed by scripts in the various languages">
      <param name="param_name" type="text" value="parameter_name" label="Choose the name for this parameter" size="60">
        <sanitizer invalid_char="">
          <valid initial="string.letters,string.digits"/>
          <mapping initial="none"/> 
        </sanitizer>
      </param>
      <param name="param_type" type="select" label="Select the type for this parameter">
        <option value="text" selected="true">text</option>
        <option value="integer">integer</option>
        <option value="float">float</option>
        <option value="file">file</option>
        <option value="library_data">library_data</option>
      </param>
      <param name="param_value" type="text" value="parameter_value" label="Enter this parameter's default value" size="60"
        help="Note that commas and double quotes cannot be used in this text field - please work around this technical limitation" >
        <sanitizer invalid_char="">
          <valid initial="string.printable"> <remove value=','/> <remove value='"'/> </valid>
          <mapping initial="none"/>
        </sanitizer>
      </param>
      <param name="param_label" type="text" value="parameter_label" label="Enter this parameter's label for the form" size="60" 
         help="Note that commas and double quotes cannot be used in this text field - please creatively work around this technical limitation" >
        <sanitizer invalid_char="">
          <valid initial="string.printable"> <remove value=','/> <remove value='"'/> </valid>
          <mapping initial="none"/>
        </sanitizer>
      </param>
      <param name="param_help" type="text" value="parameter_help" label="Help for this parameter"
         help="Note that commas and double quotes cannot be used in this text field - please work around this technical limitation" size="60">
        <sanitizer invalid_char="">
          <valid initial="string.printable"> <remove value=','/> <remove value='"'/> </valid>
          <mapping initial="none"/>
        </sanitizer>
      </param>
    </repeat> 
    <param name="dynScript" label="Cut and paste the script to be executed here" type="text" value="" area="True" size="20x120"  
      help="Script must deal with three command line parameters: please cut and paste from examples below for the appropriate scripting language">
      <sanitizer>
         <valid initial="string.printable">
         </valid>
         <mapping initial="none"/>
      </sanitizer>
     </param>
  </inputs>
  <outputs>
    <data name="output1" label="${tool_name}.${output_format}" format="tabular" metadata_source="input_tab">
        <filter>make_TAB=="yes"</filter>
        <actions>
          <action type="format">
                <option type="from_param" name="output_format" />
          </action>
        </actions>
    </data>
    <data format="html" name="html_file" label="${tool_name}.html">
        <filter>make_HTML == "yes"</filter>
    </data>
    <data format="toolshed.gz" name="new_tool" label="${tool_name}.toolshed.gz">
        <filter>makeMode['make_Tool'] == "yes"</filter>
    </data>
  </outputs>
<configfiles>
<configfile name="runme">$dynScript</configfile>
<configfile name="helpme">
#if $makeMode.make_Tool == "yes":
${makeMode.help_text}
#end if
</configfile>
<configfile name="citeme">
#if $makeMode.make_Tool == "yes":
#for $citation in $makeMode.citations:
#if $citation.citation_type.type == "bibtex":
**ENTRY**bibtex
${citation.citation_type.bibtex}
#else
**ENTRY**doi
${citation.citation_type.doi}
#end if
#end for
#end if
</configfile>
</configfiles>
<tests>
<test>
    <param name='input_tab' value='tf2_test_in.xls' ftype='tabular' />
    <param name='tool_name' value='tf2_test' />
    <param name='make_Tool' value='yes' />
    <param name='tool_version' value='0.01' />
    <param name="tool_desc" value='testing_tf2' />
    <param name="help_text" value='help text goes here' />
    <param name='include_deps' value='yes' />
    <param name="make_HTML" value="yes" />
    <param name="make_TAB" value="yes" />
    <param name="output_format" value="tabular" />
    <param name="input_formats" value="tabular" />
    <param name="interpreter" value='python' />
    <param name="envpath" value='system' />
    <param name="runme" value="tf2_test_runme.py"/>
    <output name='output1' file='tf2_test_out.xls' compare='diff' lines_diff = '10'/>
    <output name='html_file' file="tf2_test.html" compare='diff' lines_diff = '10'/>
    <output name='new_tool' file="tf2_test.toolshed.gz" compare="sim_size" delta="600" />
</test>
</tests>
<help>
    
.. class:: warningmark

**Details and attribution** 
(see GTF_)

**Local Admins ONLY** 
Only users whose IDs found in the local admin_user configuration setting in universe_wsgi.ini can run this tool.

**If you find a bug** 
Please raise an issue at the bitbucket repository GTFI_

**What it does** 
This tool enables a user to paste and submit an arbitrary R/python/perl script to Galaxy passing
any number of input files and their metadata names to the script for use in (eg) summaries or reports.
This allows quick generation of tools capable of taking some arbitrary number of user selected inputs
for the "reduce" step of a map-reduce HPC model.

**Input options** 
This version allows multiple input file selected from the history - their paths and metadata names
are provided to your script - see the examples below for each scripting language. Python uses argparse.
Rscript uses some code to create all the command line variables. Parameters are injected into the bash/sh
execution environment so they magically "appear" as $ prefixed variables and will also be found on the
command line as parameters to be parsed if that's your preference.
Note that additional parameters are NOT presented to the user of the generated tool but are frozen with the script.
If there's demand, it would not be too hard to add them to the generated script...

**Output options** 
Optional script outputs include one single new history tabular file and for scripts that create multiple file reports
or analyses, an Html page linking every file and image created by the script can be automatically generated.

**Tool Generation option** 
Once the script is working with test data, this tool will optionally generate a new Galaxy tool in a Tool Shed (gzip) repository file
ready to upload to your local toolshed for sharing and installation.
Provide a small sample input when you run generate the tool because it will become the input for the generated functional test.

.. class:: warningmark

**Note to system administrators** 
This tool offers *NO* built in protection against malicious scripts. It should only be installed on private/personnal Galaxy instances.
Admin_users will have the power to do anything they want as the Galaxy user if you install this tool.

.. class:: warningmark

**Use on public servers**  is STRONGLY discouraged for obvious reasons

The tools generated by this tool will run just as securely as any other normal installed Galaxy tool but like any other new tools, should always be checked carefully before installation.
We recommend that you follow the good code hygiene practices associated with safe toolshed.

**Scripting conventions** The pasted script will be executed with the path to the (optional) input tabular data file path or NONE if you do not select one, and the path to the optional
output file or None if none is wanted, as the first and second command line parameters. The script must deal appropriately with these - see Rscript examples below.
Note that if an optional HTML output is selected, all the output files created by the script will be nicely presented as links, with pdf images linked as thumbnails in that output.
This can be handy for complex scripts creating lots of output.

<![CDATA[

**Multiple inputs**

Your script will receive up to 3 named parameters
INPATHS is a comma separated list of input file paths
INNAMES is a comma separated list of input file names in the same order
OUTPATH is optional if a file is being generated, your script should write there
Your script should open and write files in the provided working directory if you are using the Html
automatic presentation option.

Python script command lines will have --INPATHS and --additional_arguments etc. to make it easy to use argparse

Rscript will need to use commandArgs(TRUE) - see the example below - additional arguments will
appear as themselves - eg foo="bar" will mean that foo is defined as "bar" for the script.

Bash and sh will see any additional parameters on their command lines and the 3 named parameters
in their environment magically - well, using env on the CL

***python***::

 # argparse for 3 possible comma separated lists
 # additional parameters need to be parsed !
 # then echo parameters to the output file
 import sys
 import os
 import argparse

 argp=argparse.ArgumentParser()
 argp.add_argument('--innames',default=None)
 argp.add_argument('--inpaths',default=None)
 argp.add_argument('--outpath',default=None)
 argp.add_argument('--additional_parameters',default=[],action="append")
 argp.add_argument('otherargs', nargs=argparse.REMAINDER)
 args = argp.parse_args()
 opath = args.outpath
 odir = os.path.split(opath)[0]
 try:
    os.makedirs(odir)
    print '### made',odir
 except:
    pass
 f= open(opath,'w')
 s = '### args=%s\n' % str(args)
 f.write(s)
 s = 'sys.argv=%s\n' % sys.argv
 f.write(s)
 f.close()


***Rscript***::

 # tool factory Rscript parser suggested by Forester
 # http://www.r-bloggers.com/including-arguments-in-r-cmd-batch-mode/
 # additional parameters will appear in the ls() below - they are available
 # to your script
 # echo parameters to the output file
 ourargs = commandArgs(TRUE)
 if(length(ourargs)==0){
    print("No arguments supplied.")
 }else{
    for(i in 1:length(ourargs)){
         eval(parse(text=ourargs[[i]]))
    }
 sink(OUTPATH)
 cat('INPATHS=',INPATHS,'\n')
 cat('INNAMES=',INNAMES,'\n')
 cat('OUTPATH=',OUTPATH,'\n')
 x=ls()
 cat('all objects=',x,'\n')
 sink()
 }
 sessionInfo()
 print.noquote(date())


***bash/sh***::

 # tool factory sets up these environmental variables
 # this example writes those to the output file
 # additional params appear on command line
 if [ ! -f "$OUTPATH" ] ; then
    touch "$OUTPATH"
 fi
 echo "INPATHS=$INPATHS" >> "$OUTPATH"
 echo "INNAMES=$INNAMES" >> "$OUTPATH"
 echo "OUTPATH=$OUTPATH" >> "$OUTPATH"
 echo "CL=$@" >> "$OUTPATH"

***perl***::

 (my $INPATHS,my $INNAMES,my $OUTPATH ) = @ARGV;
 open(my $fh, '>', $OUTPATH) or die "Could not open file '$OUTPATH' $!";
 print $fh "INPATHS=$INPATHS\n INNAMES=$INNAMES\n OUTPATH=$OUTPATH\n";
 close $fh;
 

]]>



Paper_ :

Creating re-usable tools from scripts: The Galaxy Tool Factory
Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team
Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573

**Licensing** 

Copyright Ross Lazarus (ross period lazarus at gmail period com) May 2012
All rights reserved.
Licensed under the LGPL_

.. _LGPL: http://www.gnu.org/copyleft/lesser.html
.. _GTF:  https://bitbucket.org/fubar/galaxytoolfactory
.. _GTFI:  https://bitbucket.org/fubar/galaxytoolfactory/issues
.. _Paper: http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573


</help>
<citations>
    <citation type="doi">10.1093/bioinformatics/bts573</citation>
</citations>
</tool>