# HG changeset patch # User fubar # Date 1425291501 18000 # Node ID 9fe74bd23af22867a834d39bd41fd52daca3a933 # Parent 1a4d3923aa9fb1c8c5039ec4427a85255f730e79 Uploaded diff -r 1a4d3923aa9f -r 9fe74bd23af2 .shed.yml --- a/.shed.yml Mon Mar 02 05:18:05 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -# repository published to https://toolshed.g2.bx.psu.edu/repos/fubar/tool_factory_2 -owner: fubar -name: tool_factory_2 diff -r 1a4d3923aa9f -r 9fe74bd23af2 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,504 @@ +GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +(This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.) + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + {description} + Copyright (C) {year} {fullname} + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 + USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random + Hacker. + + {signature of Ty Coon}, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff -r 1a4d3923aa9f -r 9fe74bd23af2 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,424 @@ +toolfactory_2 +============= + +This is an upgrade to the tool factory but with added parameters +(optionally editable in the generated tool form - otherwise fixed) and +multiple input files. + +Specify any number of parameters - well at +least up to the limit of your patience with repeat groups. + +Parameter values supplied at tool generation time are defaults and +can be optionally editable by the user - names cannot be changed once +a tool has been generated. + +If not editable, they act as hidden parameters passed to the script +and are not editable on the tool form. + +Note! There will be Galaxy default sanitization for all +user input parameters which your script may need to dance around. + +Any number of input files can be passed to your script, but of course it +has to deal with them. Both path and metadata name are supplied either in the environment +(bash/sh) or as command line parameters (python,perl,rscript) that need to be parsed and +dealt with in the script. This is complicated by the common use case of needing file names +for (eg) column headers, as well as paths. Try the examples are show on the tool factory +form to see how Galaxy file and user supplied parameter values can be recovered in each +of the 4 scripting environments supported. + +Best way to deal with multiple outputs is to let the tool factory generate an HTML +page for your users. It automagically lays out pdf images as thumbnail galleries +and can have separate results sections gathering all similarly prefixed files, such as +a Foo section taking text and results from text (foo_whatever.log) and +artifacts (eg foo_MDS_plot.pdf) file names. All artifacts are linked for download. +A copy of the actual script is provided for provenance - be warned, it exposes +real file paths. + + +tldr; + +``` + +# WARNING before you start +# Install this tool on a private Galaxy ONLY +# Please NEVER on a public or production instance +# updated august 2014 by John Chilton adding citation support +# +# updated august 8 2014 to fix bugs reported by Marius van den Beek +# please cite the resource at +http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573?ijkey=lczQh1sWrMwdYWJ&keytype=ref +# if you use this tool in your published work. + +*Short Story* + +This is an unusual Galaxy tool capable of generating new Galaxy tools. +It works by exposing *unrestricted* and therefore extremely dangerous scripting +to all designated administrators of the host Galaxy server, allowing them to +run scripts in R, python, sh and perl over multiple selected input data sets, +writing a single new data set as output. + +*Differences between TF2 and the original Tool Factory* + +1. TF2 (this one) allows any number of either fixed or user-editable parameters to be defined +for the new tool. If these are editable, the user can change them but otherwise, they are passed +as fixed and invisible parameters for each execution. Obviously, there are substantial security +implications with editable parameters, but these are always sanitized by Galaxy's inbuilt +parameter sanitization so you may need to "unsanitize" characters - eg translate all "__lt__" +into "<" for certain parameters where that is needed. Please practise safe toolshed. + +2. Any number of (the same datatype) of input files may be defined. + +These changes substantially complicate the way your supplied script is supplied with +all the new and variable parameters. Examples in each scripting language are shown +in the tool help + +*Automated outputs in named sections* + +If your script writes to the current directory path, arbitrary mix of (eg) +pdfs, tabular analysis results and run logs,the tool factory can optionally +auto-generate a linked Html page with separate sections showing a thumbnail +grid for all pdfs and the log text, grouping all artifacts sharing a file +name and log name prefix:: + + eg: if "foo.log" is emitted then *all* other outputs matching foo_* will + all be grouped together - eg + foo_baz.pdf + foo_bar.pdf and + foo_zot.xls + would all be displayed and linked in the same section with foo.log's contents + - to form the "Foo" section of the Html page. Sections appear in alphabetic + order and there are no limits on the number of files or sections. + +*Automated generation of new Galaxy tools for installation into any Galaxy* + +Once a script is working correctly, this tool optionally generates a +new Galaxy tool, effectively freezing the supplied script into a new, +ordinary Galaxy tool that runs it over one or more input files selected by +the user. Generated tools are installed via a tool shed by an administrator +and work exactly like all other Galaxy tools for your users. + +If you use the Html output option, please ensure that sanitize_all_html is +set to False and uncommented in universe_wsgi.ini - it should show:: + + # By default, all tool output served as 'text/html' will be sanitized + sanitize_all_html = False + +This opens potential security risks and may not be acceptable for public +sites where the lack of stylesheets may make Html pages damage onlookers' +eyeballs but should still be correct. + + +*More Detail* + +To use the ToolFactory, you should have prepared a script to paste into a +text box, and a small test input example ready to select from your history +to test your new script. + +There is an example in each scripting language on the Tool Factory form. You +can just cut and paste these to try it out - remember to select the right +interpreter please. You'll also need to create a small test data set using +the Galaxy history add new data tool. + +If the script fails somehow, use the "redo" button on the tool output in +your history to recreate the form complete with broken script. Fix the bug +and execute again. Rinse, wash, repeat. + +Once the script runs sucessfully, a new Galaxy tool that runs your script +can be generated. Select the "generate" option and supply some help text and +names. The new tool will be generated in the form of a new Galaxy datatype +- toolshed.gz - as the name suggests, it's an archive ready to upload to a +Galaxy ToolShed as a new tool repository. + +Once it's in a ToolShed, it can be installed into any local Galaxy server +from the server administrative interface. + +Once the new tool is installed, local users can run it - each time, the script +that was supplied when it was built will be executed with the input chosen +from the user's history. In other words, the tools you generate with the +ToolFactory run just like any other Galaxy tool,but run your script every time. + +Tool factory tools are perfect for workflow components. One input, one output, +no variables. + +*To fully and safely exploit the awesome power* of this tool, +Galaxy and the ToolShed, you should be a developer installing this +tool on a private/personal/scratch local instance where you are an +admin_user. Then, if you break it, you get to keep all the pieces see +https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home + +** Installation ** +This is a Galaxy tool. You can install it most conveniently using the +administrative "Search and browse tool sheds" link. Find the Galaxy Main +toolshed at https://toolshed.g2.bx.psu.edu/ and search for the toolfactory +repository. Open it and review the code and select the option to install it. + +( +If you can't get the tool that way, the xml and py files here need to be +copied into a new tools +subdirectory such as tools/toolfactory Your tool_conf.xml needs a new entry +pointing to the xml +file - something like:: + +
+ +
+ +If not already there (I just added it to datatypes_conf.xml.sample), +please add: + +to your local data_types_conf.xml. +) + +Of course, R, python, perl etc are needed on your path if you want to test +scripts using those interpreters. Adding new ones to this tool code should +be easy enough. Please make suggestions as bitbucket issues and code. The +HTML file code automatically shrinks R's bloated pdfs, and depends on +ghostscript. The thumbnails require imagemagick . + +* Restricted execution * +The tool factory tool itself will then be usable ONLY by admin users - +people with IDs in admin_users in universe_wsgi.ini **Yes, that's right. ONLY +admin_users can run this tool** Think about it for a moment. If allowed to +run any arbitrary script on your Galaxy server, the only thing that would +impede a miscreant bent on destroying all your Galaxy data would probably +be lack of appropriate technical skills. + +*What it does* This is a tool factory for simple scripts in python, R and +perl currently. Functional tests are automatically generated. How cool is that. + +LIMITED to simple scripts that read one input from the history. Optionally can +write one new history dataset, and optionally collect any number of outputs +into links on an autogenerated HTML index page for the user to navigate - +useful if the script writes images and output files - pdf outputs are shown +as thumbnails and R's bloated pdf's are shrunk with ghostscript so that and +imagemagik need to be available. + +Generated tools can be edited and enhanced like any Galaxy tool, so start +small and build up since a generated script gets you a serious leg up to a +more complex one. + +*What you do* You paste and run your script, you fix the syntax errors and +eventually it runs. You can use the redo button and edit the script before +trying to rerun it as you debug - it works pretty well. + +Once the script works on some test data, you can generate a toolshed compatible +gzip file containing your script ready to run as an ordinary Galaxy tool in +a repository on your local toolshed. That means safe and largely automated +installation in any production Galaxy configured to use your toolshed. + +*Generated tool Security* Once you install a generated tool, it's just +another tool - assuming the script is safe. They just run normally and their +user cannot do anything unusually insecure but please, practice safe toolshed. +Read the fucking code before you install any tool. Especially this one - +it is really scary. + +If you opt for an HTML output, you get all the script outputs arranged +as a single Html history item - all output files are linked, thumbnails for +all the pdfs. Ugly but really inexpensive. + +Patches and suggestions welcome as bitbucket issues please? + +copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 + +all rights reserved +Licensed under the LGPL if you want to improve it, feel free +https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home + +Material for our more enthusiastic and voracious readers continues below - +we salute you. + +**Motivation** Simple transformation, filtering or reporting scripts get +written, run and lost every day in most busy labs - even ours where Galaxy is +in use. This 'dark script matter' is pervasive and generally not reproducible. + +**Benefits** For our group, this allows Galaxy to fill that important dark +script gap - all those "small" bioinformatics tasks. Once a user has a working +R (or python or perl) script that does something Galaxy cannot currently do +(eg transpose a tabular file) and takes parameters the way Galaxy supplies +them (see example below), they: + +1. Install the tool factory on a personal private instance + +2. Upload a small test data set + +3. Paste the script into the 'script' text box and iteratively run the +insecure tool on test data until it works right - there is absolutely no +reason to do this anywhere other than on a personal private instance. + +4. Once it works right, set the 'Generate toolshed gzip' option and run +it again. + +5. A toolshed style gzip appears ready to upload and install like any other +Toolshed entry. + +6. Upload the new tool to the toolshed + +7. Ask the local admin to check the new tool to confirm it's not evil and +install it in the local production galaxy + + + +**Parameter passing and file inputs** + +Your script will receive up to 3 named parameters +INPATHS is a comma separated list of input file paths +INNAMES is a comma separated list of input file names in the same order +OUTPATH is optional if a file is being generated, your script should write there +Your script should open and write files in the provided working directory if you are using the Html +automatic presentation option. + +Python script command lines will have --INPATHS and --additional_arguments etc. to make it easy to use argparse + +Rscript will need to use commandArgs(TRUE) - see the example below - additional arguments will +appear as themselves - eg foo="bar" will mean that foo is defined as "bar" for the script. + +Bash and sh will see any additional parameters on their command lines and the 3 named parameters +in their environment magically - well, using env on the CL + +***python***:: + + # argparse for 3 possible comma separated lists + # additional parameters need to be parsed ! + # then echo parameters to the output file + import sys + import argparse + argp=argparse.ArgumentParser() + argp.add_argument('--INNAMES',default=None) + argp.add_argument('--INPATHS',default=None) + argp.add_argument('--OUTPATH',default=None) + argp.add_argument('--additional_parameters',default=[],action="append") + argp.add_argument('otherargs', nargs=argparse.REMAINDER) + args = argp.parse_args() + f= open(args.OUTPATH,'w') + s = '### args=%s\n' % str(args) + f.write(s) + s = 'sys.argv=%s\n' % sys.argv + f.write(s) + f.close() + + + +***Rscript***:: + + # tool factory Rscript parser suggested by Forester + # http://www.r-bloggers.com/including-arguments-in-r-cmd-batch-mode/ + # additional parameters will appear in the ls() below - they are available + # to your script + # echo parameters to the output file + ourargs = commandArgs(TRUE) + if(length(ourargs)==0){ + print("No arguments supplied.") + }else{ + for(i in 1:length(ourargs)){ + eval(parse(text=ourargs[[i]])) + } + sink(OUTPATH) + cat('INPATHS=',INPATHS,'\n') + cat('INNAMES=',INNAMES,'\n') + cat('OUTPATH=',OUTPATH,'\n') + x=ls() + cat('all objects=',x,'\n') + sink() + } + sessionInfo() + print.noquote(date()) + + +***bash/sh***:: + + # tool factory sets up these environmental variables + # this example writes those to the output file + # additional params appear on command line + if [ ! -f "$OUTPATH" ] ; then + touch "$OUTPATH" + fi + echo "INPATHS=$INPATHS" >> "$OUTPATH" + echo "INNAMES=$INNAMES" >> "$OUTPATH" + echo "OUTPATH=$OUTPATH" >> "$OUTPATH" + echo "CL=$@" >> "$OUTPATH" + +***perl***:: + + (my $INPATHS,my $INNAMES,my $OUTPATH ) = @ARGV; + open(my $fh, '>', $OUTPATH) or die "Could not open file '$OUTPATH' $!"; + print $fh "INPATHS=$INPATHS\n INNAMES=$INNAMES\n OUTPATH=$OUTPATH\n"; + close $fh; + + + +Galaxy as an IDE for developing API scripts +If you need to develop Galaxy API scripts and you like to live dangerously, +please read on. + +Galaxy as an IDE? +Amazingly enough, blend-lib API scripts run perfectly well *inside* +Galaxy when pasted into a Tool Factory form. No need to generate a new +tool. Galaxy+Tool_Factory = IDE I think we need a new t-shirt. Seriously, +it is actually quite useable. + +Why bother - what's wrong with Eclipse +Nothing. But, compared with developing API scripts in the usual way outside +Galaxy, you get persistence and other framework benefits plus at absolutely +no extra charge, a ginormous security problem if you share the history or +any outputs because they contain the api script with key so development +servers only please! + +Workflow +Fire up the Tool Factory in Galaxy. + +Leave the input box empty, set the interpreter to python, paste and run an +api script - eg working example (substitute the url and key) below. + +It took me a few iterations to develop the example below because I know +almost nothing about the API. I started with very simple code from one of the +samples and after each run, the (edited..) api script is conveniently recreated +using the redo button on the history output item. So each successive version +of the developing api script you run is persisted - ready to be edited and +rerun easily. It is ''very'' handy to be able to add a line of code to the +script and run it, then view the output to (eg) inspect dicts returned by +API calls to help move progressively deeper iteratively. + +Give the below a whirl on a private clone (install the tool factory from +the main toolshed) and try adding complexity with few rerun/edit/rerun cycles. + +Eg tool factory api script +import sys +from blend.galaxy import GalaxyInstance +ourGal = 'http://x.x.x.x:xxxx' +ourKey = 'xxx' +gi = GalaxyInstance(ourGal, key=ourKey) +libs = gi.libraries.get_libraries() +res = [] +# libs looks like +# u'url': u'/galaxy/api/libraries/441d8112651dc2f3', u'id': +u'441d8112651dc2f3', u'name':.... u'Demonstration sample RNA data', +for lib in libs: + res.append('%s:\n' % lib['name']) + res.append(str(gi.libraries.show_library(lib['id'],contents=True))) +outf=open(sys.argv[2],'w') +outf.write('\n'.join(res)) +outf.close() + +**Attribution** +Creating re-usable tools from scripts: The Galaxy Tool Factory +Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team +Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573 + +http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573?ijkey=lczQh1sWrMwdYWJ&keytype=ref + +**Licensing** +Copyright Ross Lazarus 2010 +ross lazarus at g mail period com + +All rights reserved. + +Licensed under the LGPL + +**Obligatory screenshot** + +http://bitbucket.org/fubar/galaxytoolmaker/src/fda8032fe989/images/dynamicScriptTool.png + + +``` + diff -r 1a4d3923aa9f -r 9fe74bd23af2 README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,361 @@ +# WARNING before you start +# Install this tool on a private Galaxy ONLY +# Please NEVER on a public or production instance +# updated august 2014 by John Chilton adding citation support +# +# updated august 8 2014 to fix bugs reported by Marius van den Beek +# please cite the resource at +http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573?ijkey=lczQh1sWrMwdYWJ&keytype=ref +# if you use this tool in your published work. + +*Short Story* + +This is an unusual Galaxy tool capable of generating new Galaxy tools. +It works by exposing *unrestricted* and therefore extremely dangerous scripting +to all designated administrators of the host Galaxy server, allowing them to +run scripts in R, python, sh and perl over multiple selected input data sets, +writing a single new data set as output. + +*Differences between TF2 and the original Tool Factory* + +1. TF2 (this one) allows any number of either fixed or user-editable parameters to be defined +for the new tool. If these are editable, the user can change them but otherwise, they are passed +as fixed and invisible parameters for each execution. Obviously, there are substantial security +implications with editable parameters, but these are always sanitized by Galaxy's inbuilt +parameter sanitization so you may need to "unsanitize" characters - eg translate all "__lt__" +into "<" for certain parameters where that is needed. Please practise safe toolshed. + +2. Any number of (the same datatype) of input files may be defined. + +These changes substantially complicate the way your supplied script is supplied with +all the new and variable parameters. Examples in each scripting language are shown +in the tool help + +*Automated outputs in named sections* + +If your script writes to the current directory path, arbitrary mix of (eg) +pdfs, tabular analysis results and run logs,the tool factory can optionally +auto-generate a linked Html page with separate sections showing a thumbnail +grid for all pdfs and the log text, grouping all artifacts sharing a file +name and log name prefix:: + + eg: if "foo.log" is emitted then *all* other outputs matching foo_* will + all be grouped together - eg + foo_baz.pdf + foo_bar.pdf and + foo_zot.xls + would all be displayed and linked in the same section with foo.log's contents + - to form the "Foo" section of the Html page. Sections appear in alphabetic + order and there are no limits on the number of files or sections. + +*Automated generation of new Galaxy tools for installation into any Galaxy* + +Once a script is working correctly, this tool optionally generates a +new Galaxy tool, effectively freezing the supplied script into a new, +ordinary Galaxy tool that runs it over one or more input files selected by +the user. Generated tools are installed via a tool shed by an administrator +and work exactly like all other Galaxy tools for your users. + +If you use the Html output option, please ensure that sanitize_all_html is +set to False and uncommented in universe_wsgi.ini - it should show:: + + # By default, all tool output served as 'text/html' will be sanitized + sanitize_all_html = False + +This opens potential security risks and may not be acceptable for public +sites where the lack of stylesheets may make Html pages damage onlookers' +eyeballs but should still be correct. + + +*More Detail* + +To use the ToolFactory, you should have prepared a script to paste into a +text box, and a small test input example ready to select from your history +to test your new script. + +There is an example in each scripting language on the Tool Factory form. You +can just cut and paste these to try it out - remember to select the right +interpreter please. You'll also need to create a small test data set using +the Galaxy history add new data tool. + +If the script fails somehow, use the "redo" button on the tool output in +your history to recreate the form complete with broken script. Fix the bug +and execute again. Rinse, wash, repeat. + +Once the script runs sucessfully, a new Galaxy tool that runs your script +can be generated. Select the "generate" option and supply some help text and +names. The new tool will be generated in the form of a new Galaxy datatype +- toolshed.gz - as the name suggests, it's an archive ready to upload to a +Galaxy ToolShed as a new tool repository. + +Once it's in a ToolShed, it can be installed into any local Galaxy server +from the server administrative interface. + +Once the new tool is installed, local users can run it - each time, the script +that was supplied when it was built will be executed with the input chosen +from the user's history. In other words, the tools you generate with the +ToolFactory run just like any other Galaxy tool,but run your script every time. + +Tool factory tools are perfect for workflow components. One input, one output, +no variables. + +*To fully and safely exploit the awesome power* of this tool, +Galaxy and the ToolShed, you should be a developer installing this +tool on a private/personal/scratch local instance where you are an +admin_user. Then, if you break it, you get to keep all the pieces see +https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home + +** Installation ** +This is a Galaxy tool. You can install it most conveniently using the +administrative "Search and browse tool sheds" link. Find the Galaxy Main +toolshed at https://toolshed.g2.bx.psu.edu/ and search for the toolfactory +repository. Open it and review the code and select the option to install it. + +( +If you can't get the tool that way, the xml and py files here need to be +copied into a new tools +subdirectory such as tools/toolfactory Your tool_conf.xml needs a new entry +pointing to the xml +file - something like:: + +
+ +
+ +If not already there (I just added it to datatypes_conf.xml.sample), +please add: + +to your local data_types_conf.xml. +) + +Of course, R, python, perl etc are needed on your path if you want to test +scripts using those interpreters. Adding new ones to this tool code should +be easy enough. Please make suggestions as bitbucket issues and code. The +HTML file code automatically shrinks R's bloated pdfs, and depends on +ghostscript. The thumbnails require imagemagick . + +* Restricted execution * +The tool factory tool itself will then be usable ONLY by admin users - +people with IDs in admin_users in universe_wsgi.ini **Yes, that's right. ONLY +admin_users can run this tool** Think about it for a moment. If allowed to +run any arbitrary script on your Galaxy server, the only thing that would +impede a miscreant bent on destroying all your Galaxy data would probably +be lack of appropriate technical skills. + +*What it does* This is a tool factory for simple scripts in python, R and +perl currently. Functional tests are automatically generated. How cool is that. + +LIMITED to simple scripts that read one input from the history. Optionally can +write one new history dataset, and optionally collect any number of outputs +into links on an autogenerated HTML index page for the user to navigate - +useful if the script writes images and output files - pdf outputs are shown +as thumbnails and R's bloated pdf's are shrunk with ghostscript so that and +imagemagik need to be available. + +Generated tools can be edited and enhanced like any Galaxy tool, so start +small and build up since a generated script gets you a serious leg up to a +more complex one. + +*What you do* You paste and run your script, you fix the syntax errors and +eventually it runs. You can use the redo button and edit the script before +trying to rerun it as you debug - it works pretty well. + +Once the script works on some test data, you can generate a toolshed compatible +gzip file containing your script ready to run as an ordinary Galaxy tool in +a repository on your local toolshed. That means safe and largely automated +installation in any production Galaxy configured to use your toolshed. + +*Generated tool Security* Once you install a generated tool, it's just +another tool - assuming the script is safe. They just run normally and their +user cannot do anything unusually insecure but please, practice safe toolshed. +Read the fucking code before you install any tool. Especially this one - +it is really scary. + +If you opt for an HTML output, you get all the script outputs arranged +as a single Html history item - all output files are linked, thumbnails for +all the pdfs. Ugly but really inexpensive. + +Patches and suggestions welcome as bitbucket issues please? + +copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 + +all rights reserved +Licensed under the LGPL if you want to improve it, feel free +https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home + +Material for our more enthusiastic and voracious readers continues below - +we salute you. + +**Motivation** Simple transformation, filtering or reporting scripts get +written, run and lost every day in most busy labs - even ours where Galaxy is +in use. This 'dark script matter' is pervasive and generally not reproducible. + +**Benefits** For our group, this allows Galaxy to fill that important dark +script gap - all those "small" bioinformatics tasks. Once a user has a working +R (or python or perl) script that does something Galaxy cannot currently do +(eg transpose a tabular file) and takes parameters the way Galaxy supplies +them (see example below), they: + +1. Install the tool factory on a personal private instance + +2. Upload a small test data set + +3. Paste the script into the 'script' text box and iteratively run the +insecure tool on test data until it works right - there is absolutely no +reason to do this anywhere other than on a personal private instance. + +4. Once it works right, set the 'Generate toolshed gzip' option and run +it again. + +5. A toolshed style gzip appears ready to upload and install like any other +Toolshed entry. + +6. Upload the new tool to the toolshed + +7. Ask the local admin to check the new tool to confirm it's not evil and +install it in the local production galaxy + +**Simple examples on the tool form** + +A simple Rscript "filter" showing how the command line parameters can be +handled, takes an input file, does something (transpose in this case) and +writes the results to a new tabular file:: + + # transpose a tabular input file and write as a tabular output file + ourargs = commandArgs(TRUE) + inf = ourargs[1] + outf = ourargs[2] + inp = read.table(inf,head=F,row.names=NULL,sep='\t') + outp = t(inp) + write.table(outp,outf, quote=FALSE, sep="\t",row.names=F,col.names=F) + +Calculate a multiple test adjusted p value from a column of p values - +for this script to be useful, it needs the right column for the input to be +specified in the code for the given input file type(s) specified when the +tool is generated :: + + # use p.adjust - assumes a HEADER row and column 1 - please fix for any + real use + column = 1 # adjust if necessary for some other kind of input + fdrmeth = 'BH' + ourargs = commandArgs(TRUE) + inf = ourargs[1] + outf = ourargs[2] + inp = read.table(inf,head=T,row.names=NULL,sep='\t') + p = inp[,column] + q = p.adjust(p,method=fdrmeth) + newval = paste(fdrmeth,'p-value',sep='_') + q = data.frame(q) + names(q) = newval + outp = cbind(inp,newval=q) + write.table(outp,outf, quote=FALSE, sep="\t",row.names=F,col.names=T) + + + +Another Rscript example without any input file - generates a random heatmap +pdf - you must make sure the option to create an HTML output file is +turned on for this to work. The heatmap will be presented as a thumbnail +linked to the pdf in the resulting HTML page:: + + # note this script takes NO input or output because it generates random data + foo = data.frame(a=runif(100),b=runif(100),c=runif(100),d=runif(100), + e=runif(100),f=runif(100)) + bar = as.matrix(foo) + pdf( "heattest.pdf" ) + heatmap(bar,main='Random Heatmap') + dev.off() + +A Python example that reverses each row of a tabular file. You'll need +to remove the leading spaces for this to work if cut and pasted into the +script box. Note that you can already do this in Galaxy by setting up the +cut columns tool with the correct number of columns in reverse order,but +this script will work for any number of columns so is completely generic:: + +# reverse order of columns in a tabular file +import sys +inp = sys.argv[1] +outp = sys.argv[2] +i = open(inp,'r') +o = open(outp,'w') +for row in i: + rs = row.rstrip().split('\t') + rs.reverse() + o.write('\t'.join(rs)) + o.write('\n') +i.close() +o.close() + + +Galaxy as an IDE for developing API scripts +If you need to develop Galaxy API scripts and you like to live dangerously, +please read on. + +Galaxy as an IDE? +Amazingly enough, blend-lib API scripts run perfectly well *inside* +Galaxy when pasted into a Tool Factory form. No need to generate a new +tool. Galaxy+Tool_Factory = IDE I think we need a new t-shirt. Seriously, +it is actually quite useable. + +Why bother - what's wrong with Eclipse +Nothing. But, compared with developing API scripts in the usual way outside +Galaxy, you get persistence and other framework benefits plus at absolutely +no extra charge, a ginormous security problem if you share the history or +any outputs because they contain the api script with key so development +servers only please! + +Workflow +Fire up the Tool Factory in Galaxy. + +Leave the input box empty, set the interpreter to python, paste and run an +api script - eg working example (substitute the url and key) below. + +It took me a few iterations to develop the example below because I know +almost nothing about the API. I started with very simple code from one of the +samples and after each run, the (edited..) api script is conveniently recreated +using the redo button on the history output item. So each successive version +of the developing api script you run is persisted - ready to be edited and +rerun easily. It is ''very'' handy to be able to add a line of code to the +script and run it, then view the output to (eg) inspect dicts returned by +API calls to help move progressively deeper iteratively. + +Give the below a whirl on a private clone (install the tool factory from +the main toolshed) and try adding complexity with few rerun/edit/rerun cycles. + +Eg tool factory api script +import sys +from blend.galaxy import GalaxyInstance +ourGal = 'http://x.x.x.x:xxxx' +ourKey = 'xxx' +gi = GalaxyInstance(ourGal, key=ourKey) +libs = gi.libraries.get_libraries() +res = [] +# libs looks like +# u'url': u'/galaxy/api/libraries/441d8112651dc2f3', u'id': +u'441d8112651dc2f3', u'name':.... u'Demonstration sample RNA data', +for lib in libs: + res.append('%s:\n' % lib['name']) + res.append(str(gi.libraries.show_library(lib['id'],contents=True))) +outf=open(sys.argv[2],'w') +outf.write('\n'.join(res)) +outf.close() + +**Attribution** +Creating re-usable tools from scripts: The Galaxy Tool Factory +Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team +Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573 + +http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573?ijkey=lczQh1sWrMwdYWJ&keytype=ref + +**Licensing** +Copyright Ross Lazarus 2010 +ross lazarus at g mail period com + +All rights reserved. + +Licensed under the LGPL + +**Obligatory screenshot** + +http://bitbucket.org/fubar/galaxytoolmaker/src/fda8032fe989/images/dynamicScriptTool.png + diff -r 1a4d3923aa9f -r 9fe74bd23af2 getlocalrpackages.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getlocalrpackages.py Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,53 @@ +import os +import subprocess +import sys + + +def find_packages(prefix="package_r_"): + """ + """ + #locate env.sh | grep -i package_r_ + #/data/extended/galaxyJune14_2014/tool_dependency/readline/6.2/devteam/package_r_2_15_0/8ab0d08a3da1/env.sh + #/data/home/rlazarus/galaxy/tool_dependency_dir/R_3_1_1/3.1.1/fubar/package_r_3_1_1/5f1b8d22140a/env.sh + #/data/home/rlazarus/galaxy/tool_dependency_dir/R_3_1_1/3.1.1/fubar/package_r_3_1_1/d9964efbfbe3/env.sh + #/data/home/rlazarus/galtest/tool_dependency_dir/R_3_1_1/3.1.1/fubar/package_r_3_1_1/63cdb9b2234c/env.sh + eprefix = prefix + if prefix.find('/') <> -1: + eprefix = prefix.replace('/','\/') # for grep + path = '.' + # fails on nitesh's recent mac - locate not working + # cl = ['locate env.sh | grep -i %s' % eprefix,] + cl = ['find %s -iname "env.sh" | grep -i %s' % (path,eprefix),] + p = subprocess.Popen(cl, stdout=subprocess.PIPE, stderr=subprocess.PIPE,shell=True) + out, err = p.communicate() + fpaths = out.split('\n') + fpaths = [x for x in fpaths if len(x) > 1] + fver = [x.split(os.path.sep)[-4:-1] for x in fpaths] + # >>> foo.split(os.path.sep)[-4:-1] + # ['fubar', 'package_r_3_1_1', '63cdb9b2234c'] + if len(fpaths) > 0: + res = [['%s rev %s owner %s' % (x[1],x[2],x[0]),fpaths[i],False] for i,x in enumerate(fver)] + res.insert(0,['Use default (system) interpreter','system',False]) + else: + res = [['Use default (system) interpreter','system',False], + ['**WARNING** NO package env.sh files found - is the "find" system command working? Are any interpreters installed?','system',True]] + if len(res) > 2: + res[1][2] = True # selected if more than one + # return a triplet - user_sees,value,selected - all unselected if False + return res + +def testapi(): + host_url = 'http://localhost:8080' + new_path = [ os.path.join( os.getcwd(), "lib" ) ] + new_path.extend( sys.path[1:] ) # remove scripts/ from the path + sys.path = new_path + from galaxy import config + aconfig = config.Configuration( ) + M_A_K = aconfig.master_api_key + tooldeps = aconfig.tool_dependency_dir + gi = GalaxyInstance(url=host_url, key=M_A_K) + + +if __name__ == "__main__": + print find_packages() + diff -r 1a4d3923aa9f -r 9fe74bd23af2 images/dynamicScriptTool.png Binary file images/dynamicScriptTool.png has changed diff -r 1a4d3923aa9f -r 9fe74bd23af2 rgToolFactory2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgToolFactory2.py Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,962 @@ +# rgToolFactoryMultIn.py +# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 +# +# all rights reserved +# Licensed under the LGPL +# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home +# +# January 2015 +# unified all setups by passing the script on the cl rather than via a PIPE - no need for treat_bash_special so removed +# +# in the process of building a complex tool +# added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package +# add that package to tool_dependencies +# Note that once the generated tool is loaded, it will have that package's env.sh loaded automagically so there is no +# --envshpath in the parameters for the generated tool and it uses the system one which will be first on the adjusted path. +# +# sept 2014 added additional params from +# https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default +# passing them is complex +# and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on +# the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script +# see examples on this tool form + +# august 2014 + +# Allows arbitrary number of input files +# NOTE positional parameters are now passed to script +# and output (may be "None") is *before* arbitrary number of inputs +# +# march 2014 +# had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript +# grrrrr - night before a demo +# added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable +# +# added ghostscript and graphicsmagick as dependencies +# fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp +# errors ensued +# +# august 2013 +# found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn +# +# july 2013 +# added ability to combine images and individual log files into html output +# just make sure there's a log file foo.log and it will be output +# together with all images named like "foo_*.pdf +# otherwise old format for html +# +# January 2013 +# problem pointed out by Carlos Borroto +# added escaping for <>$ - thought I did that ages ago... +# +# August 11 2012 +# changed to use shell=False and cl as a sequence + +# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye. +# It also serves as the wrapper for the new tool. +# +# you paste and run your script +# Only works for simple scripts that read one input from the history. +# Optionally can write one new history dataset, +# and optionally collect any number of outputs into links on an autogenerated HTML page. + +# DO NOT install on a public or important site - please. + +# installed generated tools are fine if the script is safe. +# They just run normally and their user cannot do anything unusually insecure +# but please, practice safe toolshed. +# Read the fucking code before you install any tool +# especially this one + +# After you get the script working on some test data, you can +# optionally generate a toolshed compatible gzip file +# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for +# safe and largely automated installation in a production Galaxy. + +# If you opt for an HTML output, you get all the script outputs arranged +# as a single Html history item - all output files are linked, thumbnails for all the pdfs. +# Ugly but really inexpensive. +# +# Patches appreciated please. +# +# +# long route to June 2012 product +# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them +# derived from an integrated script model +# called rgBaseScriptWrapper.py +# Note to the unwary: +# This tool allows arbitrary scripting on your Galaxy as the Galaxy user +# There is nothing stopping a malicious user doing whatever they choose +# Extremely dangerous!! +# Totally insecure. So, trusted users only +# +# preferred model is a developer using their throw away workstation instance - ie a private site. +# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool. +# + +import sys +import shutil +import subprocess +import os +import time +import tempfile +import optparse +import tarfile +import re +import shutil +import math + +progname = os.path.split(sys.argv[0])[1] +myversion = 'V001.1 March 2014' +verbose = False +debug = False +toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory' + +# if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated +# tool xml + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + +def quote_non_numeric(s): + """return a prequoted string for non-numerics + useful for perl and Rscript parameter passing? + """ + try: + res = float(s) + return s + except ValueError: + return '"%s"' % s + +html_escape_table = { + "&": "&", + ">": ">", + "<": "<", + "$": "\$" + } + +def html_escape(text): + """Produce entities within text.""" + return "".join(html_escape_table.get(c,c) for c in text) + + +def html_unescape(text): + """Revert entities within text.""" + t = text.replace('&','&').replace('>','>').replace('<','<').replace('\$','$') + return t + +def cmd_exists(cmd): + return subprocess.call("type " + cmd, shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + +def parse_citations(citations_text): + """ + """ + citations = [c for c in citations_text.split("**ENTRY**") if c.strip()] + citation_tuples = [] + for citation in citations: + if citation.startswith("doi"): + citation_tuples.append( ("doi", citation[len("doi"):].strip() ) ) + else: + citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) ) + return citation_tuples + +def shell_source(script): + """need a way to source a Galaxy tool interpreter env.sh to point at the right dependency package + This based on the idea in http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html + Note that we have to finesse any wierdly quoted newlines in automagic exports using nulls (env -0) as newlines""" + pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True) + output = pipe.communicate()[0] + outl = output.split('\0') + outl = [x for x in outl if len(x.split("=")) == 2] + newenv = dict((line.split("=", 1) for line in outl)) + os.environ.update(newenv) + +class ScriptRunner: + """class is a wrapper for an arbitrary script + note funky templating. this should all be done proper. + Problem is, this kludge developed quite naturally and seems to work ok with + little overhead... + + """ + + + def __init__(self,opts=None): + """ + cleanup inputs, setup some outputs + + """ + + self.toolhtmldepinterpskel = """ + + + + + + + + + + + + + %(readme)s + This file was autogenerated by the Galaxy Tool Factory 2 + + + """ + + self.toolhtmldepskel = """ + + + + + + + + + %(readme)s + This file was autogenerated by the Galaxy Tool Factory 2 + + + """ + + self.emptytoolhtmldepskel = """ + + + %(readme)s + This file was autogenerated by the Galaxy Tool Factory 2 + + + """ + + self.protorequirements = """ + ghostscript + graphicsmagick + """ + + self.protorequirements_interpreter = """ + ghostscript + graphicsmagick + %(interpreter_name)s + """ + + + self.newCommand=""" + %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" + --tool_name "%(toolname)s" + %(command_inputs)s + %(command_outputs)s + """ + + self.tooltestsTabOnly = """ + + %(test1Inputs)s + + + + %(additionalParams)s + + """ + + self.tooltestsHTMLOnly = """ + + %(test1Inputs)s + + + %(additionalParams)s + + + """ + + self.tooltestsBoth = """ + + %(test1Inputs)s + + + %(additionalParams)s + + + + """ + + self.newXML=""" +%(tooldesc)s +%(requirements)s + +%(command)s + + +%(inputs)s +%(additionalInputs)s + + +%(outputs)s + + + +%(script)s + + + +%(tooltests)s + + + +%(help)s + +This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2 +https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2 + + + %(citations)s + 10.1093/bioinformatics/bts573 + +""" + + self.useGM = cmd_exists('gm') + self.useIM = cmd_exists('convert') + self.useGS = cmd_exists('gs') + self.temp_warned = False # we want only one warning if $TMP not set + if opts.output_dir: # simplify for the tool tarball + os.chdir(opts.output_dir) + self.thumbformat = 'png' + self.opts = opts + self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but.. + self.toolid = self.toolname + self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later + self.pyfile = self.myname # crude but efficient - the cruft won't hurt much + self.xmlfile = '%s.xml' % self.toolname + rx = open(self.opts.script_path,'r').readlines() + rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed + self.script = '\n'.join(rx) + fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter)) + tscript = open(self.sfile,'w') # use self.sfile as script source for Popen + tscript.write(self.script) + tscript.close() + self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help + self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx]) + self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname) + if opts.output_dir: # may not want these complexities + self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname) + art = '%s.%s' % (self.toolname,opts.interpreter) + artpath = os.path.join(self.opts.output_dir,art) # need full path + artifact = open(artpath,'w') # use self.sfile as script source for Popen + artifact.write(self.script) + artifact.close() + self.cl = [] + self.html = [] + self.test1Inputs = [] # now a list + a = self.cl.append + a(opts.interpreter) + a(self.sfile) + # if multiple inputs - positional or need to distinguish them with cl params + if opts.input_tab: + tests = [] + for i,intab in enumerate(opts.input_tab): # if multiple, make tests + if intab.find(',') <> -1: + (gpath,uname) = intab.split(',') + else: + gpath = uname = intab + tests.append(os.path.basename(gpath)) + self.test1Inputs = '' % (','.join(tests)) + else: + self.test1Inputs = '' + # we always pass path,name pairs in using python optparse append + # but the command line has to be different + self.infile_paths = '' + self.infile_names = '' + if self.opts.input_tab: + self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab]) + self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab]) + if self.opts.interpreter == 'python': + # yes, this is how additional parameters are always passed in python - to the TF itself and to + # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line + if self.opts.input_tab: + a('--inpaths=%s' % (self.infile_paths)) + a('--innames=%s' % (self.infile_names)) + if self.opts.output_tab: + a('--outpath=%s' % self.opts.output_tab) + for p in opts.additional_parameters: + p = p.replace('"','') + psplit = p.split(',') + param = html_unescape(psplit[0]) + value = html_unescape(psplit[1]) + a('%s="%s"' % (param,value)) + if (self.opts.interpreter == 'Rscript'): + # pass params on command line as expressions which the script evaluates - see sample + if self.opts.input_tab: + a('INPATHS="%s"' % self.infile_paths) + a('INNAMES="%s"' % self.infile_names) + if self.opts.output_tab: + a('OUTPATH="%s"' % self.opts.output_tab) + for p in opts.additional_parameters: + p = p.replace('"','') + psplit = p.split(',') + param = html_unescape(psplit[0]) + value = html_unescape(psplit[1]) + a('%s=%s' % (param,quote_non_numeric(value))) + if (self.opts.interpreter == 'perl'): + # pass positional params on command line - perl script needs to discombobulate the path/name lists + if self.opts.input_tab: + a('%s' % self.infile_paths) + a('%s' % self.infile_names) + if self.opts.output_tab: + a('%s' % self.opts.output_tab) + for p in opts.additional_parameters: + # followed by any additional name=value parameter pairs + p = p.replace('"','') + psplit = p.split(',') + param = html_unescape(psplit[0]) + value = html_unescape(psplit[1]) + a('%s=%s' % (param,quote_non_numeric(value))) + if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash': + # more is better - now move all params into environment AND drop on to command line. + self.cl.insert(0,'env') + if self.opts.input_tab: + self.cl.insert(1,'INPATHS=%s' % (self.infile_paths)) + self.cl.insert(2,'INNAMES=%s' % (self.infile_names)) + if self.opts.output_tab: + self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab)) + a('OUTPATH=%s' % (self.opts.output_tab)) + # sets those environment variables for the script + # additional params appear in CL - yes, it's confusing + for i,p in enumerate(opts.additional_parameters): + psplit = p.split(',') + param = html_unescape(psplit[0]) + value = html_unescape(psplit[1]) + a('%s=%s' % (param,quote_non_numeric(value))) + self.cl.insert(4+i,'%s=%s' % (param,quote_non_numeric(value))) + self.interpreter_owner = 'SYSTEM' + self.interpreter_pack = 'SYSTEM' + self.interpreter_name = 'SYSTEM' + self.interpreter_version = 'SYSTEM' + self.interpreter_revision = 'SYSTEM' + if opts.envshpath <> 'system': # need to parse out details for our tool_dependency + try: # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play + # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh + packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c'] + self.interpreter_owner = packdetails[0] + self.interpreter_pack = packdetails[1] + self.interpreter_name = packdetails[1].split('_')[1].upper() + self.interpreter_revision = packdetails[2] + self.interpreter_version = '.'.join(packdetails[1].split('_')[2:]) + except: + pass + self.outFormats = opts.output_format + self.inputFormats = opts.input_formats + self.test1Output = '%s_test1_output.xls' % self.toolname + self.test1HTML = '%s_test1_output.html' % self.toolname + + def makeXML(self): + """ + Create a Galaxy xml tool wrapper for the new script as a string to write out + fixme - use templating or something less fugly than this example of what we produce + + + a tabular file + + reverse.py --script_path "$runMe" --interpreter "python" + --tool_name "reverse" --input_tab "$input1" --output_tab "$output1" + + + + + + + + + + + +**What it Does** + +Reverse the columns in a tabular file + + + + + +# reverse order of columns in a tabular file +import sys +inp = sys.argv[1] +outp = sys.argv[2] +i = open(inp,'r') +o = open(outp,'w') +for row in i: + rs = row.rstrip().split('\t') + rs.reverse() + o.write('\t'.join(rs)) + o.write('\n') +i.close() +o.close() + + + + + + + """ + + # these templates need a dict with the right keys to match the parameters - outputs, help, code... + + xdict = {} + xdict['additionalParams'] = '' + xdict['additionalInputs'] = '' + if self.opts.additional_parameters: + if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value + xdict['additionalInputs'] = '\n'.join(['' % \ + (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters]) + xdict['additionalParams'] = '\n'.join(['' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters]) + xdict['interpreter_owner'] = self.interpreter_owner + xdict['interpreter_version'] = self.interpreter_version + xdict['interpreter_pack'] = self.interpreter_pack + xdict['interpreter_name'] = self.interpreter_name + xdict['requirements'] = '' + if self.opts.include_dependencies == "yes": + if self.opts.envshpath <> 'system': + xdict['requirements'] = self.protorequirements_interpreter % xdict + else: + xdict['requirements'] = self.protorequirements + xdict['tool_version'] = self.opts.tool_version + xdict['test1HTML'] = self.test1HTML + xdict['test1Output'] = self.test1Output + xdict['test1Inputs'] = self.test1Inputs + if self.opts.make_HTML and self.opts.output_tab: + xdict['tooltests'] = self.tooltestsBoth % xdict + elif self.opts.make_HTML: + xdict['tooltests'] = self.tooltestsHTMLOnly % xdict + else: + xdict['tooltests'] = self.tooltestsTabOnly % xdict + xdict['script'] = self.escapedScript + # configfile is least painful way to embed script to avoid external dependencies + # but requires escaping of <, > and $ to avoid Mako parsing + if self.opts.help_text: + helptext = open(self.opts.help_text,'r').readlines() + helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek + xdict['help'] = ''.join([x for x in helptext]) + else: + xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email) + coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::'] + coda.append('\n') + coda.append(self.indentedScript) + coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow())) + coda.append('See %s for details of that project' % (toolFactoryURL)) + coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ') + coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n') + xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda)) + if self.opts.tool_desc: + xdict['tooldesc'] = '%s' % self.opts.tool_desc + else: + xdict['tooldesc'] = '' + xdict['command_outputs'] = '' + xdict['outputs'] = '' + if self.opts.input_tab: + cins = ['\n',] + cins.append('--input_formats %s' % self.opts.input_formats) + cins.append('#for intab in $input1:') + cins.append('--input_tab "${intab},${intab.name}"') + cins.append('#end for\n') + xdict['command_inputs'] = '\n'.join(cins) + xdict['inputs'] = ''' \n''' % (self.inputFormats,self.inputFormats) + else: + xdict['command_inputs'] = '' # assume no input - eg a random data generator + xdict['inputs'] = '' + if (len(self.opts.additional_parameters) > 0): + cins = ['\n',] + for params in self.opts.additional_parameters: + psplit = params.split(',') # name,value... + psplit[3] = html_escape(psplit[3]) + if self.opts.edit_additional_parameters: + psplit[1] = '$%s' % psplit[0] # replace with form value + else: + psplit[1] = html_escape(psplit[1]) # leave prespecified value + cins.append('--additional_parameters """%s"""' % ','.join(psplit)) + xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins)) + xdict['inputs'] += ' \n' % self.toolname + xdict['toolname'] = self.toolname + xdict['toolid'] = self.toolid + xdict['interpreter'] = self.opts.interpreter + xdict['scriptname'] = self.sfile + if self.opts.make_HTML: + xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"' + xdict['outputs'] += ' \n' + else: + xdict['command_outputs'] += ' --output_dir "./"' + if self.opts.output_tab: + xdict['command_outputs'] += ' --output_tab "$output1"' + xdict['outputs'] += ' \n' % self.outFormats + xdict['command'] = self.newCommand % xdict + if self.opts.citations: + citationstext = open(self.opts.citations,'r').read() + citation_tuples = parse_citations(citationstext) + citations_xml = "" + for citation_type, citation_content in citation_tuples: + citation_xml = """%s""" % (citation_type, html_escape(citation_content)) + citations_xml += citation_xml + xdict['citations'] = citations_xml + else: + xdict['citations'] = "" + xmls = self.newXML % xdict + xf = open(self.xmlfile,'w') + xf.write(xmls) + xf.write('\n') + xf.close() + # ready for the tarball + + + def makeTooltar(self): + """ + a tool is a gz tarball with eg + /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ... + """ + retval = self.run() + if retval: + print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry' + sys.exit(1) + tdir = self.toolname + os.mkdir(tdir) + self.makeXML() + if self.opts.help_text: + hlp = open(self.opts.help_text,'r').read() + else: + hlp = 'Please ask the tool author for help as none was supplied at tool generation\n' + readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name, + 'interpreter_owner':self.interpreter_owner,'interpreter_pack':self.interpreter_pack} + if self.opts.include_dependencies == "yes": + if self.opts.envshpath == 'system': + tooldepcontent = self.toolhtmldepskel % readme_dict + else: + tooldepcontent = self.toolhtmldepinterpskel % readme_dict + else: + tooldepcontent = self.emptytoolhtmldepskel % readme_dict + depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w') + depf.write(tooldepcontent) + depf.write('\n') + depf.close() + testdir = os.path.join(tdir,'test-data') + os.mkdir(testdir) # make tests directory + for i,intab in enumerate(self.opts.input_tab): + si = self.opts.input_tab[i] + if si.find(',') <> -1: + s = si.split(',')[0] + si = s + dest = os.path.join(testdir,os.path.basename(si)) + if si <> dest: + shutil.copyfile(si,dest) + if self.opts.output_tab: + shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output)) + if self.opts.make_HTML: + shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML)) + if self.opts.output_dir: + shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log')) + outpif = '%s.py' % self.toolname # new name + outpiname = os.path.join(tdir,outpif) # path for the tool tarball + pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM) + notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),] + notes.append('# to make a new Galaxy tool called %s\n' % self.toolname) + notes.append('# User %s at %s\n' % (self.opts.user_email,timenow())) + pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm + notes += pi + outpi = open(outpiname,'w') + outpi.write(''.join(notes)) + outpi.write('\n') + outpi.close() + stname = os.path.join(tdir,self.sfile) + if not os.path.exists(stname): + shutil.copyfile(self.sfile, stname) + xtname = os.path.join(tdir,self.xmlfile) + if not os.path.exists(xtname): + shutil.copyfile(self.xmlfile,xtname) + tarpath = "%s.tar.gz" % self.toolname + tar = tarfile.open(tarpath, "w:gz") + tar.add(tdir,arcname='%s' % self.toolname) + tar.close() + shutil.copyfile(tarpath,self.opts.new_tool) + shutil.rmtree(tdir) + ## TODO: replace with optional direct upload to local toolshed? + return retval + + + def compressPDF(self,inpdf=None,thumbformat='png'): + """need absolute path to pdf + note that GS gets confoozled if no $TMP or $TEMP + so we set it + """ + assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) + hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) + sto = open(hlog,'a') + our_env = os.environ.copy() + our_tmp = our_env.get('TMP',None) + if not our_tmp: + our_tmp = our_env.get('TEMP',None) + if not (our_tmp and os.path.exists(our_tmp)): + newtmp = os.path.join(self.opts.output_dir,'tmp') + try: + os.mkdir(newtmp) + except: + sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) + our_env['TEMP'] = newtmp + if not self.temp_warned: + sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) + self.temp_warned = True + outpdf = '%s_compressed' % inpdf + cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] + x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) + retval1 = x.wait() + sto.close() + if retval1 == 0: + os.unlink(inpdf) + shutil.move(outpdf,inpdf) + os.unlink(hlog) + hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) + sto = open(hlog,'w') + outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) + if self.useGM: + cl2 = ['gm', 'convert', inpdf, outpng] + else: # assume imagemagick + cl2 = ['convert', inpdf, outpng] + x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) + retval2 = x.wait() + sto.close() + if retval2 == 0: + os.unlink(hlog) + retval = retval1 or retval2 + return retval + + + def getfSize(self,fpath,outpath): + """ + format a nice file size string + """ + size = '' + fp = os.path.join(outpath,fpath) + if os.path.isfile(fp): + size = '0 B' + n = float(os.path.getsize(fp)) + if n > 2**20: + size = '%1.1f MB' % (n/2**20) + elif n > 2**10: + size = '%1.1f KB' % (n/2**10) + elif n > 0: + size = '%d B' % (int(n)) + return size + + def makeHtml(self): + """ Create an HTML file content to list all the artifacts found in the output_dir + """ + + galhtmlprefix = """ + + + + + + + +
+ """ + galhtmlattr = """
This tool (%s) was generated by the Galaxy Tool Factory

""" + galhtmlpostfix = """
\n""" + + flist = os.listdir(self.opts.output_dir) + flist = [x for x in flist if x <> 'Rplots.pdf'] + flist.sort() + html = [] + html.append(galhtmlprefix % progname) + html.append('
Galaxy Tool "%s" run at %s

' % (self.toolname,timenow())) + fhtml = [] + if len(flist) > 0: + logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections + logfiles.sort() + logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)] + logfiles.append(os.path.abspath(self.tlog)) # make it the last one + pdflist = [] + npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) + for rownum,fname in enumerate(flist): + dname,e = os.path.splitext(fname) + sfsize = self.getfSize(fname,self.opts.output_dir) + if e.lower() == '.pdf' : # compress and make a thumbnail + thumb = '%s.%s' % (dname,self.thumbformat) + pdff = os.path.join(self.opts.output_dir,fname) + retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) + if retval == 0: + pdflist.append((fname,thumb)) + else: + pdflist.append((fname,fname)) + if (rownum+1) % 2 == 0: + fhtml.append('%s%s' % (fname,fname,sfsize)) + else: + fhtml.append('%s%s' % (fname,fname,sfsize)) + for logfname in logfiles: # expect at least tlog - if more + if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later + sectionname = 'All tool run' + if (len(logfiles) > 1): + sectionname = 'Other' + ourpdfs = pdflist + else: + realname = os.path.basename(logfname) + sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log + ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] + pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove + nacross = 1 + npdf = len(ourpdfs) + + if npdf > 0: + nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) + if int(nacross)**2 != npdf: + nacross += 1 + nacross = int(nacross) + width = min(400,int(1200/nacross)) + html.append('
%s images and outputs
' % sectionname) + html.append('(Click on a thumbnail image to download the corresponding original PDF image)
') + ntogo = nacross # counter for table row padding with empty cells + html.append('
\n') + for i,paths in enumerate(ourpdfs): + fname,thumb = paths + s= """\n""" % (fname,thumb,fname,width,fname) + if ((i+1) % nacross == 0): + s += '\n' + ntogo = 0 + if i < (npdf - 1): # more to come + s += '' + ntogo = nacross + else: + ntogo -= 1 + html.append(s) + if html[-1].strip().endswith(''): + html.append('
Image called %s
\n') + else: + if ntogo > 0: # pad + html.append(' '*ntogo) + html.append('\n') + logt = open(logfname,'r').readlines() + logtext = [x for x in logt if x.strip() > ''] + html.append('
%s log output
' % sectionname) + if len(logtext) > 1: + html.append('\n
\n')
+                    html += logtext
+                    html.append('\n
\n') + else: + html.append('%s is empty
' % logfname) + if len(fhtml) > 0: + fhtml.insert(0,'
\n') + fhtml.append('
Output File Name (click to view)Size

') + html.append('
All output files available for downloading
\n') + html += fhtml # add all non-pdf files to the end of the display + else: + html.append('
### Error - %s returned no files - please confirm that parameters are sane
' % self.opts.interpreter) + html.append(galhtmlpostfix) + htmlf = file(self.opts.output_html,'w') + htmlf.write('\n'.join(html)) + htmlf.write('\n') + htmlf.close() + self.html = html + + + + def run(self): + """ + Some devteam tools have this defensive stderr read so I'm keeping with the faith + Feel free to update. + """ + if self.opts.envshpath <> 'system': + shell_source(self.opts.envshpath) + # this only happens at tool generation - the generated tool relies on the dependencies all being set up + # at toolshed installation by sourcing local env.sh + if self.opts.output_dir: + ste = open(self.elog,'wb') + sto = open(self.tlog,'wb') + s = ' '.join(self.cl) + sto.write('## Executing Toolfactory generated command line = %s\n' % s) + sto.flush() + p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir) + retval = p.wait() + sto.close() + ste.close() + tmp_stderr = open( self.elog, 'rb' ) + err = '' + buffsize = 1048576 + try: + while True: + err += tmp_stderr.read( buffsize ) + if not err or len( err ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + else: + p = subprocess.Popen(self.cl,shell=False) + retval = p.wait() + if self.opts.output_dir: + if retval <> 0 and err: # problem + print >> sys.stderr,err + if self.opts.make_HTML: + self.makeHtml() + return retval + + + +def main(): + u = """ + This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: + rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" + + """ + op = optparse.OptionParser() + a = op.add_option + a('--script_path',default=None) + a('--tool_name',default=None) + a('--interpreter',default=None) + a('--output_dir',default='./') + a('--output_html',default=None) + a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs + a("--input_formats",default="tabular") + a('--output_tab',default=None) + a('--output_format',default='tabular') + a('--user_email',default='Unknown') + a('--bad_user',default=None) + a('--make_Tool',default=None) + a('--make_HTML',default=None) + a('--help_text',default=None) + a('--tool_desc',default=None) + a('--new_tool',default=None) + a('--tool_version',default=None) + a('--include_dependencies',default=None) + a('--citations',default=None) + a('--additional_parameters', dest='additional_parameters', action='append', default=[]) + a('--edit_additional_parameters', action="store_true", default=False) + a('--envshpath',default="system") + opts, args = op.parse_args() + assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) + assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq' + assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript' + assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' + if opts.output_dir: + try: + os.makedirs(opts.output_dir) + except: + pass + opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab] + for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params + opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','') + r = ScriptRunner(opts) + if opts.make_Tool: + retcode = r.makeTooltar() + else: + retcode = r.run() + os.unlink(r.sfile) + if retcode: + sys.exit(retcode) # indicate failure to job runner + + +if __name__ == "__main__": + main() + + diff -r 1a4d3923aa9f -r 9fe74bd23af2 rgToolFactory2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgToolFactory2.xml Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,458 @@ + + Scripts into tools + + ghostscript + graphicsmagick + + + +#import os +#set dev_env = os.environ.get('GALAXY_DEVELOPMENT_ENVIRONMENT', '0') == '1' +#if not $dev_env and ( $__user_email__ not in $__admin_users__ ): + rgToolFactory2.py --bad_user $__user_email__ +#else: + rgToolFactory2.py --script_path "$runme" --interpreter "$interp.interpreter" + --tool_name "$tool_name" --user_email "$__user_email__" --citations "$citeme" + --envshpath "$interp.envpath" + + #if $make_TAB.value=="yes": + --output_tab "$output1" + --output_format "$output_format" + #end if + #if $makeMode.make_Tool=="yes": + --make_Tool "$makeMode.make_Tool" + --tool_desc "$makeMode.tool_desc" + --tool_version "$makeMode.tool_version" + --new_tool "$new_tool" + --help_text "$helpme" + #if $make_HTML.value=="yes": + #if $makeMode.include_deps.value=="yes": + --include_dependencies "yes" + #end if + #end if + #end if + #if $additional_parameters != 'None': + #if $edit_params.value == "yes": + --edit_additional_parameters + #end if + #for i in $additional_parameters: + --additional_parameters "$i.param_name,$i.param_value,$i.param_label,$i.param_help,$i.param_type" + #end for + #end if + #if $make_HTML.value=="yes": + --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" + #else: + --output_dir "." + #end if + #if len($input_tab) != 0: + --input_formats "$input_formats" + #for $intab in $input_tab: + #if $intab.ext != 'data': + --input_tab "${intab},${intab.name}" + #end if + #end for + #end if +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + make_TAB=="yes" + + + + + + + make_HTML == "yes" + + + makeMode['make_Tool'] == "yes" + + + +$dynScript + +#if $makeMode.make_Tool == "yes": +${makeMode.help_text} +#end if + + +#if $makeMode.make_Tool == "yes": +#for $citation in $makeMode.citations: +#if $citation.citation_type.type == "bibtex": +**ENTRY**bibtex +${citation.citation_type.bibtex} +#else +**ENTRY**doi +${citation.citation_type.doi} +#end if +#end for +#end if + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +**Details and attribution** +(see GTF_) + +**Local Admins ONLY** +Only users whose IDs found in the local admin_user configuration setting in universe_wsgi.ini can run this tool. + +**If you find a bug** +Please raise an issue at the bitbucket repository GTFI_ + +**What it does** +This tool enables a user to paste and submit an arbitrary R/python/perl script to Galaxy passing +any number of input files and their metadata names to the script for use in (eg) summaries or reports. +This allows quick generation of tools capable of taking some arbitrary number of user selected inputs +for the "reduce" step of a map-reduce HPC model. + +**Input options** +This version allows multiple input file selected from the history - their paths and metadata names +are provided to your script - see the examples below for each scripting language. Python uses argparse. +Rscript uses some code to create all the command line variables. Parameters are injected into the bash/sh +execution environment so they magically "appear" as $ prefixed variables and will also be found on the +command line as parameters to be parsed if that's your preference. +Note that additional parameters are NOT presented to the user of the generated tool but are frozen with the script. +If there's demand, it would not be too hard to add them to the generated script... + +**Output options** +Optional script outputs include one single new history tabular file and for scripts that create multiple file reports +or analyses, an Html page linking every file and image created by the script can be automatically generated. + +**Tool Generation option** +Once the script is working with test data, this tool will optionally generate a new Galaxy tool in a Tool Shed (gzip) repository file +ready to upload to your local toolshed for sharing and installation. +Provide a small sample input when you run generate the tool because it will become the input for the generated functional test. + +.. class:: warningmark + +**Note to system administrators** +This tool offers *NO* built in protection against malicious scripts. It should only be installed on private/personnal Galaxy instances. +Admin_users will have the power to do anything they want as the Galaxy user if you install this tool. + +.. class:: warningmark + +**Use on public servers** is STRONGLY discouraged for obvious reasons + +The tools generated by this tool will run just as securely as any other normal installed Galaxy tool but like any other new tools, should always be checked carefully before installation. +We recommend that you follow the good code hygiene practices associated with safe toolshed. + +**Scripting conventions** The pasted script will be executed with the path to the (optional) input tabular data file path or NONE if you do not select one, and the path to the optional +output file or None if none is wanted, as the first and second command line parameters. The script must deal appropriately with these - see Rscript examples below. +Note that if an optional HTML output is selected, all the output files created by the script will be nicely presented as links, with pdf images linked as thumbnails in that output. +This can be handy for complex scripts creating lots of output. + +> "$OUTPATH" + echo "INNAMES=$INNAMES" >> "$OUTPATH" + echo "OUTPATH=$OUTPATH" >> "$OUTPATH" + echo "CL=$@" >> "$OUTPATH" + +***perl***:: + + (my $INPATHS,my $INNAMES,my $OUTPATH ) = @ARGV; + open(my $fh, '>', $OUTPATH) or die "Could not open file '$OUTPATH' $!"; + print $fh "INPATHS=$INPATHS\n INNAMES=$INNAMES\n OUTPATH=$OUTPATH\n"; + close $fh; + + +]]> + + + +Paper_ : + +Creating re-usable tools from scripts: The Galaxy Tool Factory +Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team +Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573 + +**Licensing** + +Copyright Ross Lazarus (ross period lazarus at gmail period com) May 2012 +All rights reserved. +Licensed under the LGPL_ + +.. _LGPL: http://www.gnu.org/copyleft/lesser.html +.. _GTF: https://bitbucket.org/fubar/galaxytoolfactory +.. _GTFI: https://bitbucket.org/fubar/galaxytoolfactory/issues +.. _Paper: http://bioinformatics.oxfordjournals.org/cgi/reprint/bts573 + + + + + 10.1093/bioinformatics/bts573 + + + + diff -r 1a4d3923aa9f -r 9fe74bd23af2 test-data/tf2_test.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tf2_test.html Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,25 @@ + + + + + + + + +
+ +
Galaxy Tool "tf2_test" run at 29/01/2015 11:24:54

+
tf2 log output
+tf2_test_error.log is empty
+
Other log output
+/tmp/tmpdRM3H0/job_working_directory/000/2/dataset_3_files/tf2_test_runner.log is empty
+
All output files available for downloading
+ +
+ + + + +
Output File Name (click to view)Size
tf2_test.python0 B
tf2_test_error.log0 B
tf2_test_runner.log226 B

+
+ diff -r 1a4d3923aa9f -r 9fe74bd23af2 test-data/tf2_test.toolshed.gz Binary file test-data/tf2_test.toolshed.gz has changed diff -r 1a4d3923aa9f -r 9fe74bd23af2 test-data/tf2_test_in.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tf2_test_in.xls Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,3 @@ +col1 col2 col3 +r11 r12 r13 +r21 r22 r23 diff -r 1a4d3923aa9f -r 9fe74bd23af2 test-data/tf2_test_out.xls diff -r 1a4d3923aa9f -r 9fe74bd23af2 test-data/tf2_test_runme.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tf2_test_runme.py Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,25 @@ +### bog standard argparse for 3 possible comma separated lists +## followed by some silly reverse each row code provided as an example +## you're supposed to replace it with your great code.. +import sys +import argparse +import copy +argp=argparse.ArgumentParser() +argp.add_argument('--INNAMES',default=None) +argp.add_argument('--INPATHS',default=None) +argp.add_argument('--OUTPATH',default=None) +argp.add_argument('--additional_parameters',default=[],action="append") +argp.add_argument('otherargs', nargs=argparse.REMAINDER) +args = argp.parse_args() +fout = open(args.OUTPATH,'w') +sins = open(args.INPATHS.split(',')[0]).readlines() +for i,sin in enumerate(sins): + row = sin.strip().split('\t') + rrow = copy.copy(row) + lrow = len(row) + if (lrow > 1): + for j in range(lrow): + rrow[j] = row[lrow-j-1] + fout.write('\t'.join(rrow)) + fout.write('\n') +fout.close() diff -r 1a4d3923aa9f -r 9fe74bd23af2 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Mar 02 05:18:21 2015 -0500 @@ -0,0 +1,12 @@ + + + + + + + + + + Only Admins can use this tool generator but please do NOT install on a public facing Galaxy as it exposes unrestricted scripting as your Galaxy user + +