Repository 'find_three_genes_located_nearby_workflow'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/find_three_genes_located_nearby_workflow

Changeset 0:e42f2c5118ac (2015-03-17)
Next changeset 1:12a1efdaeb5b (2015-03-17)
Commit message:
Imported from capsule None
added:
P61920.fasta
P61921.fasta
Q6LDH1.fasta
find_three_genes_located_nearby.ga
find_three_genes_located_nearby.png
readme.rst
repository_dependencies.xml
b
diff -r 000000000000 -r e42f2c5118ac P61920.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/P61920.fasta Tue Mar 17 09:40:46 2015 -0400
b
@@ -0,0 +1,4 @@
+>sp|P61920|HBG1_PANTR Hemoglobin subunit gamma-1 OS=Pan troglodytes GN=HBG1 PE=1 SV=2
+MGHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK
+VKAHGKKVLTSLGDAIKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVTVLAIHFG
+KEFTPEVQASWQKMVTAVASALSSRYH
b
diff -r 000000000000 -r e42f2c5118ac P61921.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/P61921.fasta Tue Mar 17 09:40:46 2015 -0400
b
@@ -0,0 +1,4 @@
+>sp|P61921|HBG2_PANTR Hemoglobin subunit gamma-2 OS=Pan troglodytes GN=HBG2 PE=1 SV=2
+MGHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK
+VKAHGKKVLTSLGDAIKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVTVLAIHFG
+KEFTPEVQASWQKMVTGVASALSSRYH
b
diff -r 000000000000 -r e42f2c5118ac Q6LDH1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Q6LDH1.fasta Tue Mar 17 09:40:46 2015 -0400
b
@@ -0,0 +1,4 @@
+>sp|Q6LDH1|HBE_PANTR Hemoglobin subunit epsilon OS=Pan troglodytes GN=HBE1 PE=2 SV=3
+MVHFTAEEKAAVTSLWSKMNVEEAGGEALGRLLVVYPWTQRFFDSFGNLSSPSAILGNPK
+VKAHGKKVLTSFGDAIKNMDNLKPAFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG
+KEFTPEVQAAWQKLVSAVAIALAHKYH
b
diff -r 000000000000 -r e42f2c5118ac find_three_genes_located_nearby.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/find_three_genes_located_nearby.ga Tue Mar 17 09:40:46 2015 -0400
[
b'@@ -0,0 +1,965 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "Input are two protein genes in FASTA format. They will be blasted with tblastn against the NCBI NR database. Both results are compared and hits are returned that are close to each other.", \n+    "format-version": "0.1", \n+    "name": "Finding 3 genes close to each other", \n+    "steps": {\n+        "0": {\n+            "annotation": "", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "Nucleotide Reference Database"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 289, \n+                "top": 247\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"Nucleotide Reference Database\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "Protein in FASTA format"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 248.38333129882812, \n+                "top": 474.3833312988281\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"Protein in FASTA format\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "Protein in FASTA format"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 250.38333129882812, \n+                "top": 545.3833312988281\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"Protein in FASTA format\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "Protein in FASTA format"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 259.1166687011719, \n+                "top": 754.61669921875\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"Protein in FASTA format\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "4": {\n+            "annotation": "", \n+            "id": 4, \n+            "input_connections": {\n+                "input_file": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ makeblastdb", \n+            "outputs": [\n+                {\n+                    "name": "outfile", \n+                    "type": "data"\n+                }\n+            ], \n+            "position": {\n+                "left": 526.5, \n+                "top": 196\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutfile": {\n+                  '..b'               "output_name": "output"\n+                }, \n+                "input2": {\n+                    "id": 23, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Intersect", \n+            "outputs": [\n+                {\n+                    "name": "output", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 1137.11669921875, \n+                "top": 755.61669921875\n+            }, \n+            "post_job_actions": {}, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/intersect/gops_intersect_1/1.0.0", \n+            "tool_state": "{\\"input2\\": \\"null\\", \\"__page__\\": 0, \\"input1\\": \\"null\\", \\"min\\": \\"\\\\\\"1\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"returntype\\": \\"\\\\\\"\\\\\\"\\"}", \n+            "tool_version": "1.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "26": {\n+            "annotation": "", \n+            "id": 26, \n+            "input_connections": {\n+                "input": {\n+                    "id": 25, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Sort", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 1216.11669921875, \n+                "top": 648.1166687011719\n+            }, \n+            "post_job_actions": {}, \n+            "tool_errors": null, \n+            "tool_id": "sort1", \n+            "tool_state": "{\\"__page__\\": 0, \\"style\\": \\"\\\\\\"alpha\\\\\\"\\", \\"column\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"1\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"order\\": \\"\\\\\\"DESC\\\\\\"\\", \\"input\\": \\"null\\", \\"column_set\\": \\"[{\\\\\\"other_order\\\\\\": \\\\\\"DESC\\\\\\", \\\\\\"__index__\\\\\\": 0, \\\\\\"other_column\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"2\\\\\\"}, \\\\\\"other_style\\\\\\": \\\\\\"num\\\\\\"}]\\"}", \n+            "tool_version": "1.0.3", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "27": {\n+            "annotation": "", \n+            "id": 27, \n+            "input_connections": {\n+                "input1": {\n+                    "id": 26, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Group", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 1396.88330078125, \n+                "top": 648.3833312988281\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionout_file1": {\n+                    "action_arguments": {\n+                        "newname": "Counting hits per genome"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "Grouping1", \n+            "tool_state": "{\\"operations\\": \\"[{\\\\\\"opcol\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"1\\\\\\"}, \\\\\\"__index__\\\\\\": 0, \\\\\\"optype\\\\\\": \\\\\\"length\\\\\\", \\\\\\"opround\\\\\\": \\\\\\"no\\\\\\"}]\\", \\"__page__\\": 0, \\"input1\\": \\"null\\", \\"ignorelines\\": \\"null\\", \\"groupcol\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"1\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"ignorecase\\": \\"\\\\\\"False\\\\\\"\\"}", \n+            "tool_version": "2.1.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }, \n+    "uuid": "6db7ece4-0473-4fc6-a156-105186ffee7b"\n+}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r e42f2c5118ac find_three_genes_located_nearby.png
b
Binary file find_three_genes_located_nearby.png has changed
b
diff -r 000000000000 -r e42f2c5118ac readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Tue Mar 17 09:40:46 2015 -0400
b
@@ -0,0 +1,90 @@
+Galaxy workflow for the identification of candidate genes clusters
+------------------------------------------------------------------
+
+This approach screens three proteins against a given genome sequence, leading to a genome position
+were all three genes are located nearby. As usual in Galaxy workflows every
+parameter, including the proximity distance, can be changed and additional steps
+can be easily added. For example additional filtering to refine the initial BLAST
+hits, or inclusion of a third query sequence.
+
+.. image:: https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_three_genes_located_nearby/find_three_genes_located_nearby.png
+
+
+Sample Data
+===========
+
+As an example, we will use three protein sequences from *Pan troglodytes* (Chimpanzee)
+which are part of the β-globin cluster.
+
+You can upload all sequences directly into Galaxy using the "Upload tool"
+with either of these URLs - Galaxy should recognise this is FASTA files.
+
+Query sequences.
+* `P61920.fasta <https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_three_genes_located_nearby/P61920.fasta>`_
+* `P61921.fasta <https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_three_genes_located_nearby/P61921.fasta>`_
+* `Q6LDH1.fasta <https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_three_genes_located_nearby/Q6LDH1.fasta>`_
+
+Genome sequence:
+* http://hgdownload.cse.ucsc.edu/goldenPath/rn6/bigZips/rn6.fa
+
+
+In addition you can find the query sequences at the UniProt server:
+ * http://www.uniprot.org/uniprot/P61920 (Hemoglobin subunit gamma-1)
+   ::
+
+     >sp|P61920|HBG1_PANTR Hemoglobin subunit gamma-1 OS=Pan troglodytes GN=HBG1 PE=1 SV=2
+     MGHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK
+     VKAHGKKVLTSLGDAIKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVTVLAIHFG
+     KEFTPEVQASWQKMVTAVASALSSRYH
+
+
+ * http://www.uniprot.org/uniprot/P61921 (Hemoglobin subunit gamma-2)
+   ::
+
+     >sp|P61921|HBG2_PANTR Hemoglobin subunit gamma-2 OS=Pan troglodytes GN=HBG2 PE=1 SV=2
+     MGHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFDSFGNLSSASAIMGNPK
+     VKAHGKKVLTSLGDAIKHLDDLKGTFAQLSELHCDKLHVDPENFKLLGNVLVTVLAIHFG
+     KEFTPEVQASWQKMVTGVASALSSRYH
+
+
+ * http://www.uniprot.org/uniprot/Q6LDH1 (Hemoglobin subunit epsilon)
+   ::
+
+     >sp|Q6LDH1|HBE_PANTR Hemoglobin subunit epsilon OS=Pan troglodytes GN=HBE1 PE=2 SV=3
+     MVHFTAEEKAAVTSLWSKMNVEEAGGEALGRLLVVYPWTQRFFDSFGNLSSPSAILGNPK
+     VKAHGKKVLTSFGDAIKNMDNLKPAFAKLSELHCDKLHVDPENFKLLGNVMVIILATHFG
+     KEFTPEVQAAWQKLVSAVAIALAHKYH
+
+
+Citation
+========
+
+If you use this workflow directly, or a derivative of it, or the associated
+NCBI BLAST wrappers for Galaxy, in work leading to a scientific publication,
+please cite:
+
+Peter J. A. Cock, John M. Chilton, Björn Grüning, James E. Johnson, Nicola Soranzo
+NCBI BLAST+ integrated into Galaxy
+
+* http://biorxiv.org/content/early/2015/01/21/014043
+* http://dx.doi.org/10.1101/014043
+
+
+Availability
+============
+
+This workflow is available on the main Galaxy Tool Shed:
+
+http://toolshed.g2.bx.psu.edu/view/bgruening/find_three_genes_located_nearby_workflow
+
+Development is being done on github:
+
+https://github.com/bgruening/galaxytools/tree/master/workflows/ncbi_blast_plus/find_three_genes_located_nearby
+
+
+Dependencies
+============
+
+These dependencies should be resolved automatically via the Galaxy Tool Shed:
+
+* http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
b
diff -r 000000000000 -r e42f2c5118ac repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Tue Mar 17 09:40:46 2015 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This workflow requires the NCBI BLAST tools.">
+  <repository changeset_revision="2fe07f50a41e" name="ncbi_blast_plus" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>