Mercurial > repos > davidmurphy > codonlogo
changeset 9:f3462128e87c
Minor alterations to the galaxy interface with some better examples and error messages added.
line wrap: on
line diff
--- a/Codonlogo.xml Fri Jan 20 09:03:40 2012 -0500 +++ b/Codonlogo.xml Mon Jan 30 08:17:57 2012 -0500 @@ -1,6 +1,6 @@ <tool id="codonlogo" name="Codon Logo" version="3"> <description>generator for fasta (eg Clustal alignments)</description> - <command interpreter="/usr/tmp/bin/python2.7 -W ignore::DeprecationWarning"> + <command interpreter="python -W ignore::DeprecationWarning"> codonlogo -F $outformat -s $size -f $input -o $output -t "$logoname" -m $frame -n $stacks -X $showxaxis --show-yaxis $showyaxis --errorbars $errorbars -G $strict --fineprint "$fineprint" --stack-width $stackwidth --stack-height $stackheight --box $box --resolution $resolution --scale-width $scalewidth #if str($ylabel) != '' @@ -24,13 +24,13 @@ -R $compfile #end if #if str($comp.mode) == 'Escherichiacoli' - -R Escherichiacoli.txt + --comp escherichiacoli #end if #if str($comp.mode) == 'Saccharomycescerevisiae' - -R Saccharomycescerevisiae.txt + --comp saccharomycescerevisiae #end if #if str($comp.mode) == 'Homosapiens' - -R Homosapiens.txt + --comp homosapiens #end if #if $colours.colour == 'part' @@ -404,13 +404,12 @@ </conditional> <conditional name="comp"> <param name="mode" type="select" label="Select expected composition: 'equiprobable', 'none' (no small sample correction), or supply a file"> - - <option value="equiprobable" >equiprobable</option> - <option value="none" >none</option> -<!-- <option value="Escherichiacoli" >Escherichiacoli</option> - <option value="Homosapiens" >Homosapiens</option> - <option value="Saccharomycescerevisiae" >Saccharomycescerevisiae</option>--> - <option value="file">from file</option> + <option value="none" >No small sample correction</option> + <option value="equiprobable" >Equiprobable</option> + <option value="Homosapiens" >Human</option> + <option value="Yeast" >Saccharomycescerevisiae</option> + <option value="E.Coli" >Escherichiacoli</option> + <option value="From File">from file</option> </param> <when value="auto">
--- a/README.txt Fri Jan 20 09:03:40 2012 -0500 +++ b/README.txt Mon Jan 30 08:17:57 2012 -0500 @@ -1,51 +1,47 @@ - -CodonLogo (http://recode.ucc.ie/CodonLogo) is a tool for creating sequence -logos from biological sequence alignments. It can be run from the command line as a standalone webserver or as a CGI webapp. - - -For help on the command line interface run - ./codonlogo --help - -To build a simple logo run - ./codonlogo < cap.fa > logo.eps - - -To run as a standalone webserver at localhost:8080 - ./codonlogo --server - - -An example file of probabilities is included, examplepriorfile.txt -It can be used with the following command. - - ./codonlogo --prior ./examples/Escherichiacoli.txt < cap.fa > logo.eps - - -examplepriorfile contains the frequencies for codons in human CDS regions. - - -KNOWN ISSUES: - -There is a known issue with GPL Ghostscript 9.04 which affects some users which may cause ghostscript to segfault or prevent codonlogos from being generated in anything other than eps format. - -This is believed to be an issue with ghostscript and a bug report has been submitted to the ghostscript mailing list. -This is being investigated. if you encounter this problem it's recommended to downgrade to version 9.01 of ghostscript or earlier . - - - -For converting files to a suitable format the following sites can be used: - + +CodonLogo (http://recode.ucc.ie/CodonLogo) is a tool for creating sequence +logos from biological sequence alignments. It can be run on the command line, +as a standalone webserver or as a CGI webapp. + + +For help on the command line interface run + ./codonlogo --help + +To build a simple logo run + ./codonlogo < cap.fa > logo.eps + + +To run as a standalone webserver at localhost:8080 + ./codonlogo --server + + +An example file of probabilities is included, examplepriorfile.txt +It can be used with the following command. + + ./codonlogo --prior examplepriorfile.txt < cap.fa > logo.eps + + +examplepriorfile contains the frequencies for codons in human CDS regions. + + +There is a known issue with GPL Ghostscript 9.04 which affects some users which may cause ghostscript to segfault. +This is not believed to be a problem with CodonLogo. +This is being investigated. if you encounter this problem it's recommended to downgrade to version 9.01 or earlier of ghostscript. + +For converting files to a suitable format the following sites can be used: + http://genome.nci.nih.gov/tools/reformat.html http://www-bimas.cit.nih.gov/molbio/readseq/ - - - - --- Distribution and Modification -- -This package is distributed under the new BSD Open Source License. -Please see the LICENSE.txt file for details on copyright and licensing. -The CodonLogo source code can be downloaded from -http://recode.ucc.ie/CodonLogo - -CodonLogo requires Python 2.6 or 2.7, the corebio python toolkit for -computational biology (http://code.google.com/p/corebio), and the python -array package 'numpy' (http://www.scipy.org/Download) + + + + +-- Distribution and Modification -- +This package is distributed under the new BSD Open Source License. +Please see the LICENSE.txt file for details on copyright and licensing. +The CodonLogo source code can be downloaded from +http://recode.ucc.ie/CodonLogo + +CodonLogo requires Python 2.6 or 2.7, the corebio python toolkit for +computational biology (http://code.google.com/p/corebio), and the python +array package 'numpy' (http://www.scipy.org/Download)
--- a/README.txt~ Fri Jan 20 09:03:40 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ - -CodonLogo (http://recode.ucc.ie/CodonLogo) is a tool for creating sequence -logos from biological sequence alignments. It can be run on the command line, -as a standalone webserver or as a CGI webapp. - - -For help on the command line interface run - ./codonlogo --help - -To build a simple logo run - ./codonlogo < cap.fa > logo.eps - - -To run as a standalone webserver at localhost:8080 - ./codonlogo --server - - -An example file of probabilities is included, examplepriorfile.txt -It can be used with the following command. - - ./codonlogo --prior examplepriorfile.txt < cap.fa > logo.eps - - -examplepriorfile contains the frequencies for codons in human CDS regions. - - -There is a known issue with GPL Ghostscript 9.04 which affects some users which may cause ghostscript to segfault. -This is not believed to be a problem with CodonLogo. -This is being investigated. if you encounter this problem it's recommended to downgrade to version 9.01 or earlier of ghostscript. - -For converting files to a suitable format the following sites can be used: - -http://genome.nci.nih.gov/tools/reformat.html -http://www-bimas.cit.nih.gov/molbio/readseq/ - - - - --- Distribution and Modification -- -This package is distributed under the new BSD Open Source License. -Please see the LICENSE.txt file for details on copyright and licensing. -The CodonLogo source code can be downloaded from -http://recode.ucc.ie/CodonLogo - -CodonLogo requires Python 2.6 or 2.7, the corebio python toolkit for -computational biology (http://code.google.com/p/corebio), and the python -array package 'numpy' (http://www.scipy.org/Download)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/examples/Galaxy-Workflow-Figure(1).ga Mon Jan 30 08:17:57 2012 -0500 @@ -0,0 +1,118 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Generates part B", + "format-version": "0.1", + "name": "Figure", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input Dataset" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 197, + "top": 384 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Input Dataset\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Codon Logo", + "outputs": [ + { + "name": "output", + "type": "pdf" + } + ], + "position": { + "left": 580, + "top": 228 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "codonlogo", + "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"0\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"0 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", + "tool_version": "3", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Codon Logo", + "outputs": [ + { + "name": "output", + "type": "pdf" + } + ], + "position": { + "left": 580, + "top": 360 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "codonlogo", + "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"1\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"+1 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", + "tool_version": "3", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Codon Logo", + "outputs": [ + { + "name": "output", + "type": "pdf" + } + ], + "position": { + "left": 580, + "top": 493 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "codonlogo", + "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"2\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"+2 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", + "tool_version": "3", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file
--- a/examples/Galaxy-Workflow-Figure.ga Fri Jan 20 09:03:40 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "Generates part B", - "format-version": "0.1", - "name": "Figure", - "steps": { - "0": { - "annotation": "", - "id": 0, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Input Dataset" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 197, - "top": 384 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Input Dataset\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "1": { - "annotation": "", - "id": 1, - "input_connections": { - "input": { - "id": 0, - "output_name": "output" - } - }, - "inputs": [], - "name": "Codon Logo", - "outputs": [ - { - "name": "output", - "type": "pdf" - } - ], - "position": { - "left": 580, - "top": 228 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "codonlogo", - "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"0\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"0 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", - "tool_version": "3", - "type": "tool", - "user_outputs": [] - }, - "2": { - "annotation": "", - "id": 2, - "input_connections": { - "input": { - "id": 0, - "output_name": "output" - } - }, - "inputs": [], - "name": "Codon Logo", - "outputs": [ - { - "name": "output", - "type": "pdf" - } - ], - "position": { - "left": 580, - "top": 360 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "codonlogo", - "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"1\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"+1 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", - "tool_version": "3", - "type": "tool", - "user_outputs": [] - }, - "3": { - "annotation": "", - "id": 3, - "input_connections": { - "input": { - "id": 0, - "output_name": "output" - } - }, - "inputs": [], - "name": "Codon Logo", - "outputs": [ - { - "name": "output", - "type": "pdf" - } - ], - "position": { - "left": 580, - "top": 493 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "codonlogo", - "tool_state": "{\"outformat\": \"\\\"pdf\\\"\", \"frame\": \"\\\"2\\\"\", \"size\": \"\\\"large\\\"\", \"__page__\": 0, \"colours\": \"{\\\"colour\\\": \\\"no\\\", \\\"__current_case__\\\": 0}\", \"strict\": \"\\\"False\\\"\", \"logoname\": \"\\\"+2 Frame\\\"\", \"ylabel\": \"\\\"\\\"\", \"input\": \"null\", \"stackheight\": \"\\\"100\\\"\", \"scalewidth\": \"\\\"True\\\"\", \"showyaxis\": \"\\\"True\\\"\", \"comp\": \"{\\\"mode\\\": \\\"equiprobable\\\", \\\"__current_case__\\\": 1}\", \"showxaxis\": \"\\\"True\\\"\", \"stackwidth\": \"\\\"40.0\\\"\", \"box\": \"\\\"False\\\"\", \"fineprint\": \"\\\"\\\"\", \"range\": \"{\\\"seqend\\\": \\\"16\\\", \\\"mode\\\": \\\"part\\\", \\\"__current_case__\\\": 1, \\\"seqstart\\\": \\\"6\\\"}\", \"xlabel\": \"\\\"\\\"\", \"errorbars\": \"\\\"True\\\"\", \"resolution\": \"\\\"96\\\"\", \"stacks\": \"\\\"20\\\"\"}", - "tool_version": "3", - "type": "tool", - "user_outputs": [] - } - } -} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/weblogolib/Escherichiacoli.txt Mon Jan 30 08:17:57 2012 -0500 @@ -0,0 +1,64 @@ +UUU 19.7 +UUC 15 +UUA 15.2 +UUG 11.9 +CUU 11.9 +CUC 10.5 +CUA 5.3 +CUG 46.9 +AUU 30.5 +AUC 18.2 +AUA 3.7 +AUG 24.8 +GUU 16.8 +GUC 11.7 +GUA 11.5 +GUG 26.4 +UCU 5.7 +UCC 5.5 +UCA 7.8 +UCG 8 +CCU 8.4 +CCC 6.4 +CCA 6.6 +CCG 26.7 +ACU 8 +ACC 22.8 +ACA 6.4 +ACG 11.5 +GCU 10.7 +GCC 31.6 +GCA 21.1 +GCG 38.5 +UAU 16.8 +UAC 14.6 +UAA 1.8 +UAG 0 +CAU 15.8 +CAC 13.1 +CAA 12.1 +CAG 27.7 +AAU 21.9 +AAC 24.4 +AAA 33.2 +AAG 12.1 +GAU 37.9 +GAC 20.5 +GAA 43.7 +GAG 18.4 +UGU 5.9 +UGC 8 +UGA 1 +UGG 10.7 +CGU 21.1 +CGC 26 +CGA 4.3 +CGG 4.1 +AGU 7.2 +AGC 16.6 +AGA 1.4 +AGG 1.6 +GGU 21.3 +GGC 33.4 +GGA 9.2 +GGG 8.6 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/weblogolib/Homosapiens.txt Mon Jan 30 08:17:57 2012 -0500 @@ -0,0 +1,64 @@ +UUU 17.6 +UUC 20.3 +UUA 7.7 +UUG 12.9 +CUU 13.2 +CUC 19.6 +CUA 7.2 +CUG 39.6 +AUU 16 +AUC 20.8 +AUA 7.5 +AUG 22 +GUU 11 +GUC 14.5 +GUA 7.1 +GUG 28.1 +UCU 15.2 +UCC 17.7 +UCA 12.2 +UCG 4.4 +CCU 17.5 +CCC 19.8 +CCA 16.9 +CCG 6.9 +ACU 13.1 +ACC 18.9 +ACA 15.1 +ACG 6.1 +GCU 18.4 +GCC 27.7 +GCA 15.8 +GCG 7.4 +UAU 12.2 +UAC 15.3 +UAA 1 +UAG 0.8 +CAU 10.9 +CAC 15.1 +CAA 12.3 +CAG 34.2 +AAU 17 +AAC 19.1 +AAA 24.4 +AAG 31.9 +GAU 21.8 +GAC 25.1 +GAA 29 +GAG 39.6 +UGU 10.6 +UGC 12.6 +UGA 1.6 +UGG 13.2 +CGU 4.5 +CGC 10.4 +CGA 6.2 +CGG 11.4 +AGU 12.1 +AGC 19.5 +AGA 12.2 +AGG 12 +GGU 10.8 +GGC 22.2 +GGA 16.5 +GGG 16.5 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/weblogolib/Saccharomycescerevisiae.txt Mon Jan 30 08:17:57 2012 -0500 @@ -0,0 +1,64 @@ +UUU 26.1 +UUC 18.4 +UUA 26.2 +UUG 27.2 +CUU 12.3 +CUC 5.4 +CUA 13.4 +CUG 10.5 +AUU 30.1 +AUC 17.2 +AUA 17.8 +AUG 20.9 +GUU 22.1 +GUC 11.8 +GUA 11.8 +GUG 10.8 +UCU 23.5 +UCC 14.2 +UCA 18.7 +UCG 8.6 +CCU 13.5 +CCC 6.8 +CCA 18.3 +CCG 5.3 +ACU 20.3 +ACC 12.7 +ACA 17.8 +ACG 8 +GCU 21.2 +GCC 12.6 +GCA 16.2 +GCG 6.2 +UAU 18.8 +UAC 14.8 +UAA 1.1 +UAG 0.5 +CAU 13.6 +CAC 7.8 +CAA 27.3 +CAG 12.1 +AAU 35.7 +AAC 24.8 +AAA 41.9 +AAG 30.8 +GAU 37.6 +GAC 20.2 +GAA 45.6 +GAG 19.2 +UGU 8.1 +UGC 4.8 +UGA 0.7 +UGG 10.4 +CGU 6.4 +CGC 2.6 +CGA 3 +CGG 1.7 +AGU 14.2 +AGC 9.8 +AGA 21.3 +AGG 9.2 +GGU 23.9 +GGC 9.8 +GGA 10.9 +GGG 6 \ No newline at end of file
--- a/weblogolib/__init__.py Fri Jan 20 09:03:40 2012 -0500 +++ b/weblogolib/__init__.py Mon Jan 30 08:17:57 2012 -0500 @@ -982,7 +982,12 @@ if comp.lower() == 'equiprobable' : prior = weight * equiprobable_distribution(len(alphabet)) - + elif comp.lower() == 'escherichiacoli' : + composition="{'CTT': 0.7616, 'ATG': 1.5872, 'ACA': 0.4096, 'ACG': 0.736, 'ATC': 1.1648, 'AAC': 1.5615999999999999, 'ATA': 0.2368, 'AGG': 0.1024, 'CCT': 0.5376000000000001, 'ACT': 0.512, 'AGC': 1.0624, 'AAG': 0.7744, 'AGA': 0.0896, 'CAT': 1.0112, 'AAT': 1.4016, 'ATT': 1.952, 'CTG': 3.0016, 'CTA': 0.3392, 'CTC': 0.672, 'CAC': 0.8383999999999999, 'AAA': 2.1248, 'CCG': 1.7087999999999999, 'AGT': 0.4608, 'CCA': 0.4224, 'CAA': 0.7744, 'CCC': 0.4096, 'TAT': 1.0752000000000002, 'GGT': 1.3632, 'TGT': 0.37760000000000005, 'CGA': 0.2752, 'CAG': 1.7728, 'TCT': 0.3648, 'GAT': 2.4255999999999998, 'CGG': 0.26239999999999997, 'TTT': 1.2608, 'TGC': 0.512, 'GGG': 0.5504, 'TAG': 1e-06, 'GGA': 0.5888, 'TAA': 0.1152, 'GGC': 2.1376, 'TAC': 0.9344, 'TTC': 0.96, 'TCG': 0.512, 'TTA': 0.9728, 'TTG': 0.7616, 'TCC': 0.352, 'ACC': 1.4592, 'TCA': 0.4992, 'GCA': 1.3504, 'GTA': 0.736, 'GCC': 2.0224, 'GTC': 0.7487999999999999, 'GCG': 2.464, 'GTG': 1.6896, 'GAG': 1.1776, 'GTT': 1.0752000000000002, 'GCT': 0.6848, 'TGA': 0.064, 'GAC': 1.312, 'CGT': 1.3504, 'TGG': 0.6848, 'GAA': 2.7968, 'CGC': 1.664}" + elif comp.lower() == 'homosapiens' : + composition="{'CTT': 0.8448, 'ATG': 1.408, 'ACA': 0.9663999999999999, 'ACG': 0.39039999999999997, 'ATC': 1.3312, 'AAC': 1.2224000000000002, 'ATA': 0.48, 'AGG': 0.768, 'CCT': 1.12, 'ACT': 0.8383999999999999, 'AGC': 1.248, 'AAG': 2.0416, 'AGA': 0.7807999999999999, 'CAT': 0.6976, 'AAT': 1.088, 'ATT': 1.024, 'CTG': 2.5344, 'CTA': 0.4608, 'CTC': 1.2544000000000002, 'CAC': 0.9663999999999999, 'AAA': 1.5615999999999999, 'CCG': 0.44160000000000005, 'AGT': 0.7744, 'CCA': 1.0816, 'CAA': 0.7872, 'CCC': 1.2672, 'TAT': 0.7807999999999999, 'GGT': 0.6912, 'TGT': 0.6784, 'CGA': 0.3968, 'CAG': 2.1888, 'TCT': 0.9728, 'GAT': 1.3952, 'CGG': 0.7296, 'TTT': 1.1264, 'TGC': 0.8064, 'GGG': 1.056, 'TAG': 0.0512, 'GGA': 1.056, 'TAA': 0.064, 'GGC': 1.4208, 'TAC': 0.9792000000000001, 'TTC': 1.2992000000000001, 'TCG': 0.2816, 'TTA': 0.4928, 'TTG': 0.8256, 'TCC': 1.1328, 'ACC': 1.2096, 'TCA': 0.7807999999999999, 'GCA': 1.0112, 'GTA': 0.45439999999999997, 'GCC': 1.7728, 'GTC': 0.928, 'GCG': 0.4736, 'GTG': 1.7984, 'GAG': 2.5344, 'GTT': 0.704, 'GCT': 1.1776, 'TGA': 0.1024, 'GAC': 1.6064, 'CGT': 0.288, 'TGG': 0.8448, 'GAA': 1.856, 'CGC': 0.6656}" + elif comp.lower() == 'saccharomycescerevisiae' : + composition="{'CTT': 0.7872, 'ATG': 1.3376, 'ACA': 1.1392, 'ACG': 0.512, 'ATC': 1.1008, 'AAC': 1.5872, 'ATA': 1.1392, 'AGG': 0.5888, 'CCT': 0.864, 'ACT': 1.2992000000000001, 'AGC': 0.6272000000000001, 'AAG': 1.9712, 'AGA': 1.3632, 'CAT': 0.8704, 'AAT': 2.2848, 'ATT': 1.9264000000000001, 'CTG': 0.672, 'CTA': 0.8576, 'CTC': 0.3456, 'CAC': 0.4992, 'AAA': 2.6816, 'CCG': 0.3392, 'AGT': 0.9087999999999999, 'CCA': 1.1712, 'CAA': 1.7472, 'CCC': 0.4352, 'TAT': 1.2032, 'GGT': 1.5295999999999998, 'TGT': 0.5184, 'CGA': 0.192, 'CAG': 0.7744, 'TCT': 1.504, 'GAT': 2.4064, 'CGG': 0.1088, 'TTT': 1.6704, 'TGC': 0.3072, 'GGG': 0.384, 'TAG': 0.032, 'GGA': 0.6976, 'TAA': 0.0704, 'GGC': 0.6272000000000001, 'TAC': 0.9472, 'TTC': 1.1776, 'TCG': 0.5504, 'TTA': 1.6767999999999998, 'TTG': 1.7408, 'TCC': 0.9087999999999999, 'ACC': 0.8128, 'TCA': 1.1967999999999999, 'GCA': 1.0368, 'GTA': 0.7552000000000001, 'GCC': 0.8064, 'GTC': 0.7552000000000001, 'GCG': 0.3968, 'GTG': 0.6912, 'GAG': 1.2288, 'GTT': 1.4144, 'GCT': 1.3568, 'TGA': 0.0448, 'GAC': 1.2928, 'CGT': 0.4096, 'TGG': 0.6656, 'GAA': 2.9184, 'CGC': 0.1664}" elif comp.lower() == 'auto' or comp.lower() == 'automatic': if alphabet == unambiguous_protein_alphabet : prior = weight * asarray(aa_composition, float64) @@ -1256,7 +1261,7 @@ if len(seqs[i][(counter):(counter+3)].strip("GATUC"))==1 or len(seqs[i][(counter):(counter+3)].strip("GATUC"))==2 : print >>sys.stderr, 'Warning:Incomplete or non GATUC codon detected:', seqs[i][(counter):(counter+3)] print >>sys.stderr, 'Position:',counter - print >>sys.stderr, 'Sequence:',i + print >>sys.stderr, 'Sequence:',(i+1) print >>sys.stderr, 'This will be treated as ---' @@ -1386,7 +1391,8 @@ priordict[line[0].upper().replace("U", "T")]=(float(line[1])/1000)*64 else: priordict[line[0].upper().replace("T", "U")]=(float(line[1])/1000)*64 - + if priordict[line[0].upper().replace("U", "T")] == 0: + priordict[line[0].upper().replace("U", "T")] = 0.000001 return priordict def _build_logodata(options) : @@ -1632,7 +1638,7 @@ action="store", type="string", default = "auto", - help="The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (Do not perform any compositional adjustment), ", + help="The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (Do not perform any compositional adjustment), or 'escherichiacoli' 'homosapiens' 'saccharomycescerevisiae' for ecoli, human and SC codon frequencies.", metavar="COMP.") data_grp.add_option( "", "--weight",