Previous changeset 6:64e67f172188 (2013-11-21) Next changeset 8:09a8be9247ca (2016-01-09) |
Commit message:
v0.1.1 fix typo; v0.1.0 BED output (Eric Rasche), NCBI genetic code 24; v0.0.7 embeds citation |
modified:
tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.py tools/get_orfs_or_cdss/get_orfs_or_cdss.xml tools/get_orfs_or_cdss/tool_dependencies.xml |
added:
test-data/get_orf_input.Suis_ORF.bed test-data/get_orf_input.t11_bed_out.bed test-data/get_orf_input.t11_open_bed_out.bed test-data/get_orf_input.t1_bed_out.bed |
b |
diff -r 64e67f172188 -r 705a2e2df7fb test-data/get_orf_input.Suis_ORF.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.Suis_ORF.bed Thu Jul 30 12:35:31 2015 -0400 |
b |
b'@@ -0,0 +1,2910 @@\n+Streptococcus_suis\t0\t1374\tStreptococcus_suis|ORF1\t0\t+\n+Streptococcus_suis\t1506\t2664\tStreptococcus_suis|ORF2\t0\t+\n+Streptococcus_suis\t1706\t2021\tStreptococcus_suis|ORF3\t0\t-\n+Streptococcus_suis\t2755\t3637\tStreptococcus_suis|ORF4\t0\t+\n+Streptococcus_suis\t3932\t4313\tStreptococcus_suis|ORF5\t0\t+\n+Streptococcus_suis\t4380\t5514\tStreptococcus_suis|ORF6\t0\t+\n+Streptococcus_suis\t4449\t4797\tStreptococcus_suis|ORF7\t0\t-\n+Streptococcus_suis\t4490\t4838\tStreptococcus_suis|ORF8\t0\t-\n+Streptococcus_suis\t5662\t6241\tStreptococcus_suis|ORF9\t0\t+\n+Streptococcus_suis\t6234\t9735\tStreptococcus_suis|ORF10\t0\t+\n+Streptococcus_suis\t6973\t7351\tStreptococcus_suis|ORF11\t0\t-\n+Streptococcus_suis\t8270\t8624\tStreptococcus_suis|ORF12\t0\t-\n+Streptococcus_suis\t10037\t10412\tStreptococcus_suis|ORF13\t0\t+\n+Streptococcus_suis\t10522\t11815\tStreptococcus_suis|ORF14\t0\t+\n+Streptococcus_suis\t11815\t13084\tStreptococcus_suis|ORF15\t0\t+\n+Streptococcus_suis\t13076\t13634\tStreptococcus_suis|ORF16\t0\t+\n+Streptococcus_suis\t13634\t15629\tStreptococcus_suis|ORF17\t0\t+\n+Streptococcus_suis\t13834\t14764\tStreptococcus_suis|ORF18\t0\t-\n+Streptococcus_suis\t15963\t16437\tStreptococcus_suis|ORF19\t0\t+\n+Streptococcus_suis\t17316\t17886\tStreptococcus_suis|ORF20\t0\t-\n+Streptococcus_suis\t19641\t19953\tStreptococcus_suis|ORF21\t0\t-\n+Streptococcus_suis\t20790\t21111\tStreptococcus_suis|ORF22\t0\t-\n+Streptococcus_suis\t21319\t21649\tStreptococcus_suis|ORF23\t0\t+\n+Streptococcus_suis\t23559\t24405\tStreptococcus_suis|ORF24\t0\t+\n+Streptococcus_suis\t23777\t24143\tStreptococcus_suis|ORF25\t0\t-\n+Streptococcus_suis\t24385\t24910\tStreptococcus_suis|ORF26\t0\t+\n+Streptococcus_suis\t24910\t26251\tStreptococcus_suis|ORF27\t0\t+\n+Streptococcus_suis\t26344\t27322\tStreptococcus_suis|ORF28\t0\t+\n+Streptococcus_suis\t27399\t28587\tStreptococcus_suis|ORF29\t0\t+\n+Streptococcus_suis\t28218\t28563\tStreptococcus_suis|ORF30\t0\t-\n+Streptococcus_suis\t28567\t29356\tStreptococcus_suis|ORF31\t0\t+\n+Streptococcus_suis\t28603\t28942\tStreptococcus_suis|ORF32\t0\t-\n+Streptococcus_suis\t29343\t30360\tStreptococcus_suis|ORF33\t0\t+\n+Streptococcus_suis\t30691\t31426\tStreptococcus_suis|ORF34\t0\t+\n+Streptococcus_suis\t31426\t35158\tStreptococcus_suis|ORF35\t0\t+\n+Streptococcus_suis\t32643\t33138\tStreptococcus_suis|ORF36\t0\t-\n+Streptococcus_suis\t34368\t34755\tStreptococcus_suis|ORF37\t0\t-\n+Streptococcus_suis\t34816\t35128\tStreptococcus_suis|ORF38\t0\t-\n+Streptococcus_suis\t35016\t36615\tStreptococcus_suis|ORF39\t0\t+\n+Streptococcus_suis\t36640\t37693\tStreptococcus_suis|ORF40\t0\t+\n+Streptococcus_suis\t37333\t37738\tStreptococcus_suis|ORF41\t0\t-\n+Streptococcus_suis\t37665\t38241\tStreptococcus_suis|ORF42\t0\t+\n+Streptococcus_suis\t38198\t38555\tStreptococcus_suis|ORF43\t0\t-\n+Streptococcus_suis\t38241\t39798\tStreptococcus_suis|ORF44\t0\t+\n+Streptococcus_suis\t38277\t38709\tStreptococcus_suis|ORF45\t0\t-\n+Streptococcus_suis\t39303\t39609\tStreptococcus_suis|ORF46\t0\t-\n+Streptococcus_suis\t39437\t39809\tStreptococcus_suis|ORF47\t0\t-\n+Streptococcus_suis\t39892\t40318\tStreptococcus_suis|ORF48\t0\t-\n+Streptococcus_suis\t39920\t41186\tStreptococcus_suis|ORF49\t0\t+\n+Streptococcus_suis\t40224\t40698\tStreptococcus_suis|ORF50\t0\t+\n+Streptococcus_suis\t40229\t40925\tStreptococcus_suis|ORF51\t0\t-\n+Streptococcus_suis\t41070\t41451\tStreptococcus_suis|ORF52\t0\t-\n+Streptococcus_suis\t41193\t41700\tStreptococcus_suis|ORF53\t0\t+\n+Streptococcus_suis\t41309\t41615\tStreptococcus_suis|ORF54\t0\t+\n+Streptococcus_suis\t41683\t42766\tStreptococcus_suis|ORF55\t0\t+\n+Streptococcus_suis\t41692\t42064\tStreptococcus_suis|ORF56\t0\t-\n+Streptococcus_suis\t42188\t42569\tStreptococcus_suis|ORF57\t0\t-\n+Streptococcus_suis\t42794\t43571\tStreptococcus_suis|ORF58\t0\t+\n+Streptococcus_suis\t43041\t43365\tStreptococcus_suis|ORF59\t0\t-\n+Streptococcus_suis\t43447\t43894\tStreptococcus_suis|ORF60\t0\t-\n+Streptococcus_suis\t43619\t44870\tStreptococcus_suis|ORF61\t0\t+\n+Streptococcus_suis\t44859\t46164\tStreptococcus_suis|ORF62\t0\t+\n+Streptococcus_suis\t45179\t45524\tStreptococcus_suis|ORF63\t0\t-\n+Streptococcus_suis\t47040\t47829\tStreptococcus_suis|ORF64\t0\t+\n+Streptococcus_suis\t47829\t48417\tStreptococcus_suis|ORF65\t0\t+\n+Streptococcus_suis\t48296\t48860\tStreptococcus_suis|'..b'956142\t1956526\tStreptococcus_suis|ORF2851\t0\t+\n+Streptococcus_suis\t1956835\t1958599\tStreptococcus_suis|ORF2852\t0\t-\n+Streptococcus_suis\t1957087\t1957456\tStreptococcus_suis|ORF2853\t0\t+\n+Streptococcus_suis\t1958210\t1958606\tStreptococcus_suis|ORF2854\t0\t+\n+Streptococcus_suis\t1958681\t1959143\tStreptococcus_suis|ORF2855\t0\t-\n+Streptococcus_suis\t1959143\t1960055\tStreptococcus_suis|ORF2856\t0\t-\n+Streptococcus_suis\t1959625\t1959934\tStreptococcus_suis|ORF2857\t0\t+\n+Streptococcus_suis\t1960124\t1961171\tStreptococcus_suis|ORF2858\t0\t-\n+Streptococcus_suis\t1961182\t1963597\tStreptococcus_suis|ORF2859\t0\t-\n+Streptococcus_suis\t1963905\t1964370\tStreptococcus_suis|ORF2860\t0\t-\n+Streptococcus_suis\t1963943\t1964489\tStreptococcus_suis|ORF2861\t0\t-\n+Streptococcus_suis\t1964461\t1967398\tStreptococcus_suis|ORF2862\t0\t-\n+Streptococcus_suis\t1967597\t1968323\tStreptococcus_suis|ORF2863\t0\t+\n+Streptococcus_suis\t1968313\t1969627\tStreptococcus_suis|ORF2864\t0\t+\n+Streptococcus_suis\t1969665\t1971015\tStreptococcus_suis|ORF2865\t0\t-\n+Streptococcus_suis\t1971097\t1972630\tStreptococcus_suis|ORF2866\t0\t-\n+Streptococcus_suis\t1972747\t1973206\tStreptococcus_suis|ORF2867\t0\t-\n+Streptococcus_suis\t1973262\t1973877\tStreptococcus_suis|ORF2868\t0\t-\n+Streptococcus_suis\t1974226\t1974649\tStreptococcus_suis|ORF2869\t0\t+\n+Streptococcus_suis\t1974436\t1975804\tStreptococcus_suis|ORF2870\t0\t-\n+Streptococcus_suis\t1974991\t1975387\tStreptococcus_suis|ORF2871\t0\t+\n+Streptococcus_suis\t1975823\t1976297\tStreptococcus_suis|ORF2872\t0\t-\n+Streptococcus_suis\t1976286\t1978269\tStreptococcus_suis|ORF2873\t0\t-\n+Streptococcus_suis\t1978354\t1980301\tStreptococcus_suis|ORF2874\t0\t-\n+Streptococcus_suis\t1978597\t1979140\tStreptococcus_suis|ORF2875\t0\t+\n+Streptococcus_suis\t1979950\t1980460\tStreptococcus_suis|ORF2876\t0\t+\n+Streptococcus_suis\t1980267\t1980729\tStreptococcus_suis|ORF2877\t0\t-\n+Streptococcus_suis\t1980718\t1981180\tStreptococcus_suis|ORF2878\t0\t-\n+Streptococcus_suis\t1980808\t1981186\tStreptococcus_suis|ORF2879\t0\t+\n+Streptococcus_suis\t1981166\t1981703\tStreptococcus_suis|ORF2880\t0\t-\n+Streptococcus_suis\t1981512\t1981878\tStreptococcus_suis|ORF2881\t0\t+\n+Streptococcus_suis\t1981760\t1982099\tStreptococcus_suis|ORF2882\t0\t-\n+Streptococcus_suis\t1982179\t1982602\tStreptococcus_suis|ORF2883\t0\t+\n+Streptococcus_suis\t1982325\t1982631\tStreptococcus_suis|ORF2884\t0\t-\n+Streptococcus_suis\t1982644\t1983916\tStreptococcus_suis|ORF2885\t0\t-\n+Streptococcus_suis\t1983950\t1984655\tStreptococcus_suis|ORF2886\t0\t+\n+Streptococcus_suis\t1983992\t1984349\tStreptococcus_suis|ORF2887\t0\t-\n+Streptococcus_suis\t1984655\t1985540\tStreptococcus_suis|ORF2888\t0\t+\n+Streptococcus_suis\t1985996\t1986602\tStreptococcus_suis|ORF2889\t0\t-\n+Streptococcus_suis\t1986682\t1987480\tStreptococcus_suis|ORF2890\t0\t-\n+Streptococcus_suis\t1987469\t1988357\tStreptococcus_suis|ORF2891\t0\t-\n+Streptococcus_suis\t1988287\t1989124\tStreptococcus_suis|ORF2892\t0\t-\n+Streptococcus_suis\t1989120\t1989669\tStreptococcus_suis|ORF2893\t0\t-\n+Streptococcus_suis\t1989669\t1990539\tStreptococcus_suis|ORF2894\t0\t-\n+Streptococcus_suis\t1990608\t1991928\tStreptococcus_suis|ORF2895\t0\t-\n+Streptococcus_suis\t1991888\t1993154\tStreptococcus_suis|ORF2896\t0\t-\n+Streptococcus_suis\t1993212\t1993599\tStreptococcus_suis|ORF2897\t0\t+\n+Streptococcus_suis\t1993568\t1994696\tStreptococcus_suis|ORF2898\t0\t+\n+Streptococcus_suis\t1994967\t1996464\tStreptococcus_suis|ORF2899\t0\t-\n+Streptococcus_suis\t1996680\t1997781\tStreptococcus_suis|ORF2900\t0\t-\n+Streptococcus_suis\t1998012\t1998933\tStreptococcus_suis|ORF2901\t0\t+\n+Streptococcus_suis\t1998922\t2000620\tStreptococcus_suis|ORF2902\t0\t+\n+Streptococcus_suis\t1999704\t2000052\tStreptococcus_suis|ORF2903\t0\t-\n+Streptococcus_suis\t1999973\t2000306\tStreptococcus_suis|ORF2904\t0\t+\n+Streptococcus_suis\t2000501\t2000855\tStreptococcus_suis|ORF2905\t0\t+\n+Streptococcus_suis\t2000887\t2003506\tStreptococcus_suis|ORF2906\t0\t+\n+Streptococcus_suis\t2003906\t2004614\tStreptococcus_suis|ORF2907\t0\t-\n+Streptococcus_suis\t2004614\t2005157\tStreptococcus_suis|ORF2908\t0\t-\n+Streptococcus_suis\t2005222\t2006464\tStreptococcus_suis|ORF2909\t0\t+\n+Streptococcus_suis\t2006518\t2007289\tStreptococcus_suis|ORF2910\t0\t+\n' |
b |
diff -r 64e67f172188 -r 705a2e2df7fb test-data/get_orf_input.t11_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t11_bed_out.bed Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -0,0 +1,6 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + +beta 68 332 beta|CDS1 0 + +beta 72 327 beta|CDS2 0 + +beta 76 337 beta|CDS3 0 + |
b |
diff -r 64e67f172188 -r 705a2e2df7fb test-data/get_orf_input.t11_open_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t11_open_bed_out.bed Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -0,0 +1,7 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + +beta 68 332 beta|CDS1 0 + +beta 72 327 beta|CDS2 0 + +beta 76 337 beta|CDS3 0 + +beta 333 408 beta|CDS4 0 + |
b |
diff -r 64e67f172188 -r 705a2e2df7fb test-data/get_orf_input.t1_bed_out.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/get_orf_input.t1_bed_out.bed Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -0,0 +1,3 @@ +alpha 67 331 alpha|CDS1 0 + +alpha 71 326 alpha|CDS2 0 + +alpha 75 336 alpha|CDS3 0 + |
b |
diff -r 64e67f172188 -r 705a2e2df7fb tools/get_orfs_or_cdss/README.rst --- a/tools/get_orfs_or_cdss/README.rst Thu Nov 21 10:47:53 2013 -0500 +++ b/tools/get_orfs_or_cdss/README.rst Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -1,7 +1,7 @@ Galaxy tool to find ORFs or simple CDSs ======================================= -This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute +This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -31,21 +31,23 @@ There are just two files to install to use this tool from within Galaxy: -* get_orfs_or_cdss.py (the Python script) -* get_orfs_or_cdss.xml (the Galaxy tool definition) +* ``get_orfs_or_cdss.py`` (the Python script) +* ``get_orfs_or_cdss.xml`` (the Galaxy tool definition) -The suggested location is in a dedicated tools/get_orfs_or_cdss folder. +The suggested location is in a dedicated ``tools/get_orfs_or_cdss`` folder. -You will also need to modify the tools_conf.xml file to tell Galaxy to offer the +You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool. One suggested location is in the filters section. Simply add the line:: <tool file="get_orfs_or_cdss/get_orfs_or_cdss.xml" /> -You will also need to install Biopython 1.54 or later. If you want to run -the unit tests, include this line in tools_conf.xml.sample and the sample -FASTA files under the test-data directory. Then:: +You will also need to install Biopython 1.65 or later (slightly older versions +should be fine, but will not have the latest NCBI genetic code tables). - ./run_functional_tests.sh -id get_orfs_or_cdss +If you wish to run the unit tests, also move/copy the ``test-data/`` files +under Galaxy's ``test-data/`` folder. Then:: + + ./run_tests.sh -id get_orfs_or_cdss That's it. @@ -68,6 +70,11 @@ - Updated citation information (Cock et al. 2013). - Renamed folder and adopted README.rst naming. v0.0.6 - Corrected automated dependency defintion. +v0.0.7 - Tool definition now embeds citation information. +v0.1.0 - Tool now outputs BED formatted calls (by @erasche, Eric Rasche). + - Using ``optparse`` for the Python command line API (Eric Rasche). + - Added NCBI genetic code table 24, Pterobranchia Mitochondrial. +v0.1.1 - Reorder XML elements (internal change only). ======= ====================================================================== @@ -80,28 +87,41 @@ Development has now moved to a dedicated GitHub repository: https://github.com/peterjc/pico_galaxy/tree/master/tools -For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use -the following command from the Galaxy root folder:: +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ tar -czf get_orfs_or_cdss.tar.gz tools/get_orfs_or_cdss/README.rst tools/get_orfs_or_cdss/get_orfs_or_cdss.* tools/get_orfs_or_cdss/tool_dependencies.xml test-data/get_orf_input*.fasta test-data/Ssuis.fasta + $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/get_orfs_or_cdss/ + ... -Check this worked:: +or:: - $ tar -tzf get_orfs_or_cdss.tar.gz - tools/get_orfs_or_cdss/README.rst - tools/get_orfs_or_cdss/get_orfs_or_cdss.py - tools/get_orfs_or_cdss/get_orfs_or_cdss.xml - tools/get_orfs_or_cdss/tool_dependencies.xml - test-data/get_orf_input.fasta + $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/get_orfs_or_cdss/ + ... + +To just build and check the tar ball, use:: + + $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/get_orfs_or_cdss/ + ... + $ tar -tzf shed_upload.tar.gz + test-data/Ssuis.fasta + test-data/get_orf_input.Suis_ORF.bed test-data/get_orf_input.Suis_ORF.nuc.fasta test-data/get_orf_input.Suis_ORF.prot.fasta + test-data/get_orf_input.fasta + test-data/get_orf_input.t11_bed_out.bed test-data/get_orf_input.t11_nuc_out.fasta + test-data/get_orf_input.t11_open_bed_out.bed test-data/get_orf_input.t11_open_nuc_out.fasta test-data/get_orf_input.t11_open_prot_out.fasta test-data/get_orf_input.t11_prot_out.fasta + test-data/get_orf_input.t1_bed_out.bed test-data/get_orf_input.t1_nuc_out.fasta test-data/get_orf_input.t1_prot_out.fasta - test-data/Ssuis.fasta + tools/get_orfs_or_cdss/get_orfs_or_cdss.py + tools/get_orfs_or_cdss/get_orfs_or_cdss.xml + tools/get_orfs_or_cdss/README.rst + tools/get_orfs_or_cdss/tool_dependencies.xml Licence (MIT) |
b |
diff -r 64e67f172188 -r 705a2e2df7fb tools/get_orfs_or_cdss/get_orfs_or_cdss.py --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Thu Nov 21 10:47:53 2013 -0500 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.py Thu Jul 30 12:35:31 2015 -0400 |
[ |
b'@@ -1,12 +1,8 @@\n #!/usr/bin/env python\n """Find ORFs in a nucleotide sequence file.\n \n-get_orfs_or_cdss.py $input_fasta $input_format $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file\n-\n-Takes ten command line options, input sequence filename, format, genetic\n-code, CDS vs ORF, end type (open, closed), selection mode (all, top, one),\n-minimum length (in amino acids), strand (both, forward, reverse), output\n-nucleotide filename, and output protein filename.\n+For more details, see the help text and argument descriptions in the\n+accompanying get_orfs_or_cdss.xml file which defines a Galaxy interface.\n \n This tool is a short Python script which requires Biopython. If you use\n this tool in scientific work leading to a publication, please cite the\n@@ -19,67 +15,90 @@\n This script is copyright 2011-2013 by Peter Cock, The James Hutton Institute\n (formerly SCRI), Dundee, UK. All rights reserved.\n \n-See accompanying text file for licence details (MIT/BSD style).\n+See accompanying text file for licence details (MIT licence).\n \n-This is version 0.0.3 of the script.\n+This is version 0.1.0 of the script.\n """\n import sys\n import re\n+from optparse import OptionParser\n \n-if "-v" in sys.argv or "--version" in sys.argv:\n- print "v0.0.3"\n- sys.exit(0)\n-\n-def stop_err(msg, err=1):\n+def sys_exit(msg, err=1):\n sys.stderr.write(msg.rstrip() + "\\n")\n sys.exit(err)\n \n+usage = """Use as follows:\n+\n+$ python get_orfs_or_cdss.py -i genome.fa -f fasta --table 11 -t CDS -e open -m all -s both --on cds.nuc.fa --op cds.protein.fa --ob cds.bed\n+"""\n+\n try:\n from Bio.Seq import Seq, reverse_complement, translate\n from Bio.SeqRecord import SeqRecord\n from Bio import SeqIO\n from Bio.Data import CodonTable\n except ImportError:\n- stop_err("Missing Biopython library")\n+ sys_exit("Missing Biopython library")\n+\n \n-#Parse Command Line\n-try:\n- input_file, seq_format, table, ftype, ends, mode, min_len, strand, out_nuc_file, out_prot_file = sys.argv[1:]\n-except ValueError:\n- stop_err("Expected ten arguments, got %i:\\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-i\', \'--input\', dest=\'input_file\',\n+ default=None, help=\'Input fasta file\',\n+ metavar=\'FILE\')\n+parser.add_option(\'-f\', \'--format\', dest=\'seq_format\',\n+ default=\'fasta\', help=\'Sequence format (e.g. fasta, fastq, sff)\')\n+parser.add_option(\'--table\', dest=\'table\',\n+ default=1, help=\'NCBI Translation table\', type=\'int\')\n+parser.add_option(\'-t\', \'--ftype\', dest=\'ftype\', type=\'choice\',\n+ choices=[\'CDS\', \'ORF\'], default=\'ORF\',\n+ help=\'Find ORF or CDSs\')\n+parser.add_option(\'-e\', \'--ends\', dest=\'ends\', type=\'choice\',\n+ choices=[\'open\', \'closed\'], default=\'closed\',\n+ help=\'Open or closed. Closed ensures start/stop codons are present\')\n+parser.add_option(\'-m\', \'--mode\', dest=\'mode\', type=\'choice\',\n+ choices=[\'all\', \'top\', \'one\'], default=\'all\',\n+ help=\'Output all ORFs/CDSs from sequence, all ORFs/CDSs \'\n+ \'with max length, or first with maximum length\')\n+parser.add_option(\'--min_len\', dest=\'min_len\',\n+ default=10, help=\'Minimum ORF/CDS length\', type=\'int\')\n+parser.add_option(\'-s\', \'--strand\', dest=\'strand\', type=\'choice\',\n+ choices=[\'forward\', \'reverse\', \'both\'], default=\'both\',\n+ help=\'Strand to search for features on\')\n+parser.add_option(\'--on\', dest=\'out_nuc_file\',\n+ default=None, help=\'Output nucleotide sequences, or - for STDOUT\',\n+ metavar=\'FILE\')\n+parser.add_option(\'--op\', dest=\'out_prot_file\',\n+ default=None, help=\'Output protein sequences, or - for STDOUT\',\n+ metavar=\'FILE\')\n+parser.add_option(\'--ob\', dest=\'out_bed_file\',\n+ default=None, help=\'Output BED file, or - for STDOUT\',\n'..b's.ftype=="CDS":\n offset, n, t = start_chop_and_trans(n)\n else:\n offset = 0\n- t = translate(n, table, to_stop=True)\n- if n and len(t) >= min_len:\n+ t = translate(n, options.table, to_stop=True)\n+ if n and len(t) >= options.min_len:\n yield start + offset, n, t\n start = index\n- if ends == "open":\n+ if options.ends == "open":\n #No stop codon, Biopython\'s strict CDS translate will fail\n n = s[start:]\n #Ensure we have whole codons\n@@ -135,14 +154,14 @@\n n = n[:-1]\n if len(n) % 3:\n n = n[:-1]\n- if ftype=="CDS":\n+ if options.ftype=="CDS":\n offset, n, t = start_chop_and_trans(n, strict=False)\n else:\n offset = 0\n- t = translate(n, table, to_stop=True)\n- if n and len(t) >= min_len:\n+ t = translate(n, options.table, to_stop=True)\n+ if n and len(t) >= options.min_len:\n yield start + offset, n, t\n- \n+\n \n def get_all_peptides(nuc_seq):\n """Returns start, end, strand, nucleotides, protein.\n@@ -153,12 +172,12 @@\n #rather than making a list and sorting?\n answer = []\n full_len = len(nuc_seq)\n- if strand != "reverse":\n+ if options.strand != "reverse":\n for frame in range(0,3):\n for offset, n, t in break_up_frame(nuc_seq[frame:]):\n start = frame + offset #zero based\n answer.append((start, start + len(n), +1, n, t))\n- if strand != "forward":\n+ if options.strand != "forward":\n rc = reverse_complement(nuc_seq)\n for frame in range(0,3) :\n for offset, n, t in break_up_frame(rc[frame:]):\n@@ -184,24 +203,31 @@\n raise StopIteration\n yield values[0]\n \n-if mode == "all":\n+if options.mode == "all":\n get_peptides = get_all_peptides\n-elif mode == "top":\n+elif options.mode == "top":\n get_peptides = get_top_peptides\n-elif mode == "one":\n+elif options.mode == "one":\n get_peptides = get_one_peptide\n \n in_count = 0\n out_count = 0\n-if out_nuc_file == "-":\n+if options.out_nuc_file == "-":\n out_nuc = sys.stdout\n else:\n- out_nuc = open(out_nuc_file, "w")\n-if out_prot_file == "-":\n+ out_nuc = open(options.out_nuc_file, "w")\n+\n+if options.out_prot_file == "-":\n out_prot = sys.stdout\n else:\n- out_prot = open(out_prot_file, "w")\n-for record in SeqIO.parse(input_file, seq_format):\n+ out_prot = open(options.out_prot_file, "w")\n+\n+if options.out_bed_file == "-":\n+ out_bed = sys.stdout\n+else:\n+ out_bed = open(options.out_bed_file, "w")\n+\n+for record in SeqIO.parse(options.input_file, seq_format):\n for i, (f_start, f_end, f_strand, n, t) in enumerate(get_peptides(str(record.seq).upper())):\n out_count += 1\n if f_strand == +1:\n@@ -210,14 +236,18 @@\n loc = "complement(%i..%i)" % (f_start+1, f_end)\n descr = "length %i aa, %i bp, from %s of %s" \\\n % (len(t), len(n), loc, record.description)\n- r = SeqRecord(Seq(n), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n- t = SeqRecord(Seq(t), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)\n+ fid = record.id + "|%s%i" % (options.ftype, i+1)\n+ r = SeqRecord(Seq(n), id = fid, name = "", description= descr)\n+ t = SeqRecord(Seq(t), id = fid, name = "", description= descr)\n SeqIO.write(r, out_nuc, "fasta")\n SeqIO.write(t, out_prot, "fasta")\n+ out_bed.write(\'\\t\'.join(map(str,[record.id, f_start, f_end, fid, 0, \'+\' if f_strand == +1 else \'-\'])) + \'\\n\')\n in_count += 1\n if out_nuc is not sys.stdout:\n out_nuc.close()\n if out_prot is not sys.stdout:\n out_prot.close()\n+if out_bed is not sys.stdout:\n+ out_bed.close()\n \n-print "Found %i %ss in %i sequences" % (out_count, ftype, in_count)\n+print "Found %i %ss in %i sequences" % (out_count, options.ftype, in_count)\n' |
b |
diff -r 64e67f172188 -r 705a2e2df7fb tools/get_orfs_or_cdss/get_orfs_or_cdss.xml --- a/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Thu Nov 21 10:47:53 2013 -0500 +++ b/tools/get_orfs_or_cdss/get_orfs_or_cdss.xml Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -1,18 +1,18 @@ -<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.0.5"> +<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.1.1"> <description>e.g. to get peptides from ESTs</description> <requirements> - <requirement type="package" version="1.62">biopython</requirement> + <requirement type="package" version="1.65">biopython</requirement> <requirement type="python-module">Bio</requirement> </requirements> - <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command> - <command interpreter="python"> -get_orfs_or_cdss.py $input_file $input_file.ext $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file - </command> <stdio> <!-- Anything other than zero is an error --> <exit_code range="1:" /> <exit_code range=":-1" /> </stdio> + <version_command interpreter="python">get_orfs_or_cdss.py --version</version_command> + <command interpreter="python"> +get_orfs_or_cdss.py -i $input_file -f $input_file.ext --table $table -t $ftype -e $ends -m $mode --min_len $min_len -s $strand --on $out_nuc_file --op $out_prot_file --ob $out_bed_file + </command> <inputs> <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file (nucleotides)" help="FASTA, FASTQ, or SFF format." /> <param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons"> @@ -33,6 +33,7 @@ <option value="21">21. Trematode Mitochondrial</option> <option value="22">22. Scenedesmus obliquus</option> <option value="23">23. Thraustochytrium Mitochondrial</option> + <option value="24">24. Pterobranchia Mitochondrial</option> </param> <param name="ftype" type="select" value="True" label="Look for ORFs or CDSs"> <option value="ORF">Look for ORFs (check for stop codons only, ignore start codons)</option> @@ -49,7 +50,7 @@ <option value="one">First ORF/CDS from each sequence with the maximum length</option> </param> <param name="min_len" type="integer" size="5" value="30" label="Minimum length ORF/CDS (in amino acids, e.g. 30 aa = 90 bp plus any stop codon)" /> - <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing."> + <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing)."> <option value="both">Search both the forward and reverse strand</option> <option value="forward">Only search the forward strand</option> <option value="reverse">Only search the reverse strand</option> @@ -58,6 +59,7 @@ <outputs> <data name="out_nuc_file" format="fasta" label="${ftype.value}s (nucleotides)" /> <data name="out_prot_file" format="fasta" label="${ftype.value}s (amino acids)" /> + <data name="out_bed_file" format="bed6" label="${ftype.value}s (bed)" /> </outputs> <tests> <test> @@ -70,6 +72,7 @@ <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t1_nuc_out.fasta" /> <output name="out_prot_file" file="get_orf_input.t1_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t1_bed_out.bed" /> </test> <test> <param name="input_file" value="get_orf_input.fasta" /> @@ -80,7 +83,8 @@ <param name="min_len" value="10" /> <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t11_nuc_out.fasta" /> - <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" /> + <output name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t11_bed_out.bed" /> </test> <test> <param name="input_file" value="get_orf_input.fasta" /> @@ -92,6 +96,7 @@ <param name="strand" value="forward" /> <output name="out_nuc_file" file="get_orf_input.t11_open_nuc_out.fasta" /> <output name="out_prot_file" file="get_orf_input.t11_open_prot_out.fasta" /> + <output name="out_bed_file" file="get_orf_input.t11_open_bed_out.bed" /> </test> <test> <param name="input_file" value="Ssuis.fasta" /> @@ -103,6 +108,7 @@ <param name="strand" value="both" /> <output name="out_nuc_file" file="get_orf_input.Suis_ORF.nuc.fasta" /> <output name="out_prot_file" file="get_orf_input.Suis_ORF.prot.fasta" /> + <output name="out_bed_file" file="get_orf_input.Suis_ORF.bed" /> </test> </tests> <help> @@ -134,7 +140,7 @@ potential start codon will be used, giving the longest possible CDS within each ORF, and thus the longest possible protein sequence. This is useful for things like BLAST or domain searching, but since this may not be the -correct start codon may not be appropriate for signal peptide detection +correct start codon, it may not be appropriate for signal peptide detection etc. **Example Usage** @@ -145,7 +151,7 @@ encode one protein as a single ORF/CDS, which you wish to extract (and perhaps translate into amino acids). -If your RNS-Seq data was strand specific, and assembled taking this into +If your RNA-Seq data was strand specific, and assembled taking this into account, you should only search for ORFs/CDSs on the forward strand. **Citation** @@ -168,4 +174,8 @@ This tool is available to install into other Galaxy Instances via the Galaxy Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/get_orfs_or_cdss </help> + <citations> + <citation type="doi">10.7717/peerj.167</citation> + <citation type="doi">10.1093/bioinformatics/btp163</citation> + </citations> </tool> |
b |
diff -r 64e67f172188 -r 705a2e2df7fb tools/get_orfs_or_cdss/tool_dependencies.xml --- a/tools/get_orfs_or_cdss/tool_dependencies.xml Thu Nov 21 10:47:53 2013 -0500 +++ b/tools/get_orfs_or_cdss/tool_dependencies.xml Thu Jul 30 12:35:31 2015 -0400 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.62"> - <repository changeset_revision="3e82cbc44886" name="package_biopython_1_62" owner="biopython" toolshed="http://toolshed.g2.bx.psu.edu" /> + <package name="biopython" version="1.65"> + <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |