# HG changeset patch # User peterjc # Date 1367854155 14400 # Node ID 0ec2a079f68d609f56d456f784131cc79821caf6 Uploaded v0.0.1, includes README file and dependencies. diff -r 000000000000 -r 0ec2a079f68d README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.txt Mon May 06 11:29:15 2013 -0400 @@ -0,0 +1,25 @@ +This Tool Shed Repository contains a workflow for the identification of candidate secreted proteins from a given protein FASATA file. + +It runs SignalP v3.0 and selects only proteins with a strong predicted signal peptide, and then runs TMHMM v2.0 on those, and selects only proteins without a predicted trans-membrane helix. This workflow was used in Kikuchi et al (2001), and is a simplification of the candidate effector protocol described in Jones et al (2009). + +Kikuchi T, Cotton JA, Dalzell JJ, Hasegawa K, Kanzaki N, et al. (2011) Genomic insights into the origin of parasitism in the emerging plant pathogen Bursaphelenchus xylophilus. PLoS Pathog 7: e1002219. +http://dx.doi.org/10.1371/journal.ppat.1002219 + +Jones JT, Kumar A, Pylypenko LA, Thirugnanasambandam A, Castelli L, et al. (2009) Identification and functional characterization of effectors in expressed sequence tags from various life cycle stages of the potato cyst nematode Globodera pallida. Mol Plant Pathol 10: 815–28. +http://dx.doi.org/10.1111/j.1364-3703.2009.00585.x + +Bendtsen JD, Nielsen H, von Heijne G, Brunak S (2004) Improved prediction of signal peptides: SignalP 3.0. J Mol Biol 340: 783–95. +http://dx.doi.org/10.1016/j.jmb.2004.05.028 + +Krogh A, Larsson B, von Heijne G, Sonnhammer E (2001) Predicting transmembrane protein topology with a hidden Markov model: application to complete genomes. J Mol Biol 305: 567- 580. +http://dx.doi.org/10.1006/jmbi.2000.4315 + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: +http://toolshed.g2.bx.psu.edu/view/peterjc/secreted_protein_workflow + +Development is being done on github here: +https://github.com/peterjc/picobio/tree/master/galaxy_workflows/secreted_protein_workflow diff -r 000000000000 -r 0ec2a079f68d repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon May 06 11:29:15 2013 -0400 @@ -0,0 +1,7 @@ + + + + + + + diff -r 000000000000 -r 0ec2a079f68d secreted_protein_workflow.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/secreted_protein_workflow.ga Mon May 06 11:29:15 2013 -0400 @@ -0,0 +1,288 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Runs SignalP v3.0 and TMHMM v2.0 to look for secreted proteins.", + "format-version": "0.1", + "name": "Find secreted proteins with TMHMM and SignalP", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input Dataset" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 200, + "top": 200 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Input Dataset\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "fasta_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SignalP 3.0", + "name": "organism" + } + ], + "name": "SignalP 3.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 240, + "top": 341 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "signalp3", + "tool_state": "{\"organism\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"truncate\": \"\\\"60\\\"\", \"__page__\": 0}", + "tool_version": "0.0.8", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "Select proteins with predicted signal peptide (SignalP NN D-Score or HMM)", + "id": 2, + "input_connections": { + "input": { + "id": 1, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 323, + "top": 528 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered SignalP results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c14=='Y' or c15=='S'\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "Select those sequences with signal peptides.", + "id": 3, + "input_connections": { + "input_file": { + "id": 0, + "output_name": "output" + }, + "input_tabular": { + "id": 2, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 527, + "top": 200 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "HideDatasetActionoutput_pos": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "fasta_file": { + "id": 3, + "output_name": "output_pos" + } + }, + "inputs": [], + "name": "TMHMM 2.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 643, + "top": 443 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "tmhmm2", + "tool_state": "{\"__page__\": 0, \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.0.7", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "Select proteins with no predicted transmembrane helices.", + "id": 5, + "input_connections": { + "input": { + "id": 4, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 729, + "top": 566 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered TMHMM results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c5== 0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "Select those sequences with no transmembrane helices (from those with signal peptides).", + "id": 6, + "input_connections": { + "input_file": { + "id": 3, + "output_name": "output_pos" + }, + "input_tabular": { + "id": 5, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 893, + "top": 281 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "RenameDatasetActionoutput_pos": { + "action_arguments": { + "newname": "Secreted proteins" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file