Repository 'proteomics_rnaseq_splice_db_workflow'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/proteomics_rnaseq_splice_db_workflow

Changeset 0:2f3cb6bae4e9 (2014-03-17)
Next changeset 1:c2e58e1c045d (2014-03-17)
Commit message:
Initial upload
added:
README.rst
proteomics_rnaseq_splice_db_workflow.ga
repository_dependencies.xml
b
diff -r 000000000000 -r 2f3cb6bae4e9 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Mon Mar 17 09:19:07 2014 -0500
[
@@ -0,0 +1,46 @@
+This is package is a Galaxy workflow for the detection and incorporation of novel splice sequences into custom splice-junction databases. 
+
+The splice database workflow first aligns RNA-Seq data to the genome twice, first to only those splice junctions found in the Ensembl gene models and second to both the Ensembl gene models and reference genome.  The output BED files, which contain the coordinates of all detected junctions, are compared and only those coordinates for splice junctions not present in the gene models are retrieved.  Next, the genomic sequences for each splice junction is retrieved.  We developed a program, "Translate BED sequences", which translates the splice junctions and compiles all splice-junction polypeptide sequences of the user's choice.  The user may choose to filter out splice junction entries that contain stop codons, are less than a certain length, and are below a certain expression level measured by splice-junction RNA-Seq read depth.
+
+See http://www.galaxyproject.org for information about the Galaxy Project.
+
+
+Availability
+============
+
+This workflow is available to download and/or install from the main
+Galaxy Tool Shed:
+
+http://toolshed.g2.bx.psu.edu/view/galaxyp/proteomics_rnaseq_splice_db_workflow
+
+
+Reference Data
+==============
+
+For Human RNAseq data this workflow was tested using reference data from:
+
+* ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.chromosome.[1-9XY]*.fa.gz
+* ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz
+* ftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz
+
+
+Dependencies
+============
+
+These dependencies should be resolved automatically via the Galaxy Tool Shed:
+
+* http://toolshed.g2.bx.psu.edu/view/devteam/tophat
+* http://toolshed.g2.bx.psu.edu/view/jjohnson/filter_bed_on_splice_junctions
+* http://toolshed.g2.bx.psu.edu/view/jjohnson/translate_bed_sequences
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1  - Initial release to Tool Shed (March, 2014)
+======= ======================================================================
+
+
b
diff -r 000000000000 -r 2f3cb6bae4e9 proteomics_rnaseq_splice_db_workflow.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/proteomics_rnaseq_splice_db_workflow.ga Mon Mar 17 09:19:07 2014 -0500
[
b'@@ -0,0 +1,394 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "Create a peptide fasta database with novel splice junctions that are inferred from RNAseq data", \n+    "format-version": "0.1", \n+    "name": "Proteomics Splice DB", \n+    "steps": {\n+        "0": {\n+            "annotation": "", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "RNA-Seq left mate pair fastq"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 207, \n+                "top": 429\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"RNA-Seq left mate pair fastq\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "RNA-Seq right mate pair fastq"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 205, \n+                "top": 639\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"RNA-Seq right mate pair fastq\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "Reference Genome FASTA file"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 862, \n+                "top": 558\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"Reference Genome FASTA file\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "input": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Select first", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 447, \n+                "top": 272\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionout_file1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "Show beginning1", \n+            "tool_state": "{\\"__page__\\": 0, \\"input\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/hg19.len\\\\\\"\\", \\"lineNum\\": \\"\\\\\\"100000\\\\\\"\\"}", \n+            "tool_version": "1.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "4": {\n+            "annotation": "", \n+            "id": 4, \n+            "input_connections": {\n+                "input": {\n+                    "id": 1, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name"'..b'   }, \n+            "post_job_actions": {\n+                "HideDatasetActionnovel_junctions": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "novel_junctions"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/filter_bed_on_splice_junctions/filter_bed_on_splice_junctions/0.0.1", \n+            "tool_state": "{\\"__page__\\": 0, \\"input_bed\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"leading_bp\\": \\"\\\\\\"66\\\\\\"\\", \\"guided_junctions\\": \\"null\\", \\"trailing_bp\\": \\"\\\\\\"66\\\\\\"\\", \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\\\\"\\"}", \n+            "tool_version": "0.0.1", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "8": {\n+            "annotation": "", \n+            "id": 8, \n+            "input_connections": {\n+                "input": {\n+                    "id": 7, \n+                    "output_name": "novel_junctions"\n+                }, \n+                "seq_source|ref_file": {\n+                    "id": 2, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Extract Genomic DNA", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 1499, \n+                "top": 668\n+            }, \n+            "post_job_actions": {}, \n+            "tool_errors": null, \n+            "tool_id": "Extract genomic DNA 1", \n+            "tool_state": "{\\"out_format\\": \\"\\\\\\"interval\\\\\\"\\", \\"__page__\\": 0, \\"interpret_features\\": \\"\\\\\\"no\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"seq_source\\": \\"{\\\\\\"index_source\\\\\\": \\\\\\"history\\\\\\", \\\\\\"ref_file\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1}\\", \\"input\\": \\"null\\", \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\\\\"\\"}", \n+            "tool_version": "2.2.3", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "9": {\n+            "annotation": "", \n+            "id": 9, \n+            "input_connections": {\n+                "input": {\n+                    "id": 8, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Translate BED Sequences", \n+            "outputs": [\n+                {\n+                    "name": "output", \n+                    "type": "fasta"\n+                }\n+            ], \n+            "position": {\n+                "left": 1763, \n+                "top": 581\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/translate_bed_sequences/translate_bed_sequences/0.0.1", \n+            "tool_state": "{\\"trim\\": \\"{\\\\\\"trimseqs\\\\\\": \\\\\\"yes\\\\\\", \\\\\\"__current_case__\\\\\\": 1}\\", \\"min_length\\": \\"\\\\\\"10\\\\\\"\\", \\"reference\\": \\"\\\\\\"\\\\\\"\\", \\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"filter\\": \\"{\\\\\\"filterseqs\\\\\\": \\\\\\"yes\\\\\\", \\\\\\"trailing_bp\\\\\\": \\\\\\"66\\\\\\", \\\\\\"leading_bp\\\\\\": \\\\\\"66\\\\\\", \\\\\\"__current_case__\\\\\\": 0}\\", \\"score_name\\": \\"\\\\\\"depth\\\\\\"\\", \\"input\\": \\"null\\", \\"seqtype\\": \\"\\\\\\"pep:splice\\\\\\"\\", \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\\\\"\\"}", \n+            "tool_version": "0.0.1", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 2f3cb6bae4e9 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Mon Mar 17 09:19:07 2014 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<repositories description="Required tools for proteomics_rnaseq_splice_db_workflow">
+    <repository name="tophat" owner="devteam" />
+    <repository name="filter_bed_on_splice_junctions" owner="jjohnson" />
+    <repository name="translate_bed_sequences" owner="jjohnson" />
+</repositories>