changeset 0:2f3cb6bae4e9

Initial upload
author Jim Johnson <jj@umn.edu>
date Mon, 17 Mar 2014 09:19:07 -0500
parents
children c2e58e1c045d
files README.rst proteomics_rnaseq_splice_db_workflow.ga repository_dependencies.xml
diffstat 3 files changed, 446 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Mon Mar 17 09:19:07 2014 -0500
@@ -0,0 +1,46 @@
+This is package is a Galaxy workflow for the detection and incorporation of novel splice sequences into custom splice-junction databases. 
+
+The splice database workflow first aligns RNA-Seq data to the genome twice, first to only those splice junctions found in the Ensembl gene models and second to both the Ensembl gene models and reference genome.  The output BED files, which contain the coordinates of all detected junctions, are compared and only those coordinates for splice junctions not present in the gene models are retrieved.  Next, the genomic sequences for each splice junction is retrieved.  We developed a program, "Translate BED sequences", which translates the splice junctions and compiles all splice-junction polypeptide sequences of the user's choice.  The user may choose to filter out splice junction entries that contain stop codons, are less than a certain length, and are below a certain expression level measured by splice-junction RNA-Seq read depth.
+
+See http://www.galaxyproject.org for information about the Galaxy Project.
+
+
+Availability
+============
+
+This workflow is available to download and/or install from the main
+Galaxy Tool Shed:
+
+http://toolshed.g2.bx.psu.edu/view/galaxyp/proteomics_rnaseq_splice_db_workflow
+
+
+Reference Data
+==============
+
+For Human RNAseq data this workflow was tested using reference data from:
+
+* ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.chromosome.[1-9XY]*.fa.gz
+* ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz
+* ftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz
+
+
+Dependencies
+============
+
+These dependencies should be resolved automatically via the Galaxy Tool Shed:
+
+* http://toolshed.g2.bx.psu.edu/view/devteam/tophat
+* http://toolshed.g2.bx.psu.edu/view/jjohnson/filter_bed_on_splice_junctions
+* http://toolshed.g2.bx.psu.edu/view/jjohnson/translate_bed_sequences
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1  - Initial release to Tool Shed (March, 2014)
+======= ======================================================================
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/proteomics_rnaseq_splice_db_workflow.ga	Mon Mar 17 09:19:07 2014 -0500
@@ -0,0 +1,394 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "Create a peptide fasta database with novel splice junctions that are inferred from RNAseq data", 
+    "format-version": "0.1", 
+    "name": "Proteomics Splice DB", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "RNA-Seq left mate pair fastq"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 207, 
+                "top": 429
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"RNA-Seq left mate pair fastq\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "RNA-Seq right mate pair fastq"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 205, 
+                "top": 639
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"RNA-Seq right mate pair fastq\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Reference Genome FASTA file"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 862, 
+                "top": 558
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Reference Genome FASTA file\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "input": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Select first", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 447, 
+                "top": 272
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Show beginning1", 
+            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"lineNum\": \"\\\"100000\\\"\"}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "4": {
+            "annotation": "", 
+            "id": 4, 
+            "input_connections": {
+                "input": {
+                    "id": 1, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Select first", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 445, 
+                "top": 737
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Show beginning1", 
+            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"lineNum\": \"\\\"100000\\\"\"}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "5": {
+            "annotation": "", 
+            "id": 5, 
+            "input_connections": {
+                "input1": {
+                    "id": 3, 
+                    "output_name": "out_file1"
+                }, 
+                "refGenomeSource|ownFile": {
+                    "id": 2, 
+                    "output_name": "output"
+                }, 
+                "singlePaired|input2": {
+                    "id": 4, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Tophat for Illumina", 
+            "outputs": [
+                {
+                    "name": "insertions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "deletions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "junctions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "accepted_hits", 
+                    "type": "bam"
+                }
+            ], 
+            "position": {
+                "left": 1049, 
+                "top": 206
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionaccepted_hits": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "accepted_hits"
+                }, 
+                "HideDatasetActiondeletions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "deletions"
+                }, 
+                "HideDatasetActioninsertions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "insertions"
+                }, 
+                "HideDatasetActionjunctions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "junctions"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tophat/tophat/1.5.0", 
+            "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"singlePaired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"pParams\\\": {\\\"library_type\\\": \\\"fr-unstranded\\\", \\\"closure_search\\\": {\\\"use_search\\\": \\\"No\\\", \\\"__current_case__\\\": 1}, \\\"__current_case__\\\": 1, \\\"indel_search\\\": {\\\"max_insertion_length\\\": \\\"3\\\", \\\"max_deletion_length\\\": \\\"3\\\", \\\"__current_case__\\\": 1, \\\"allow_indel_search\\\": \\\"Yes\\\"}, \\\"seg_length\\\": \\\"25\\\", \\\"max_intron_length\\\": \\\"500000\\\", \\\"pSettingsType\\\": \\\"full\\\", \\\"min_intron_length\\\": \\\"70\\\", \\\"microexon_search\\\": \\\"No\\\", \\\"min_segment_intron\\\": \\\"50\\\", \\\"max_multihits\\\": \\\"20\\\", \\\"coverage_search\\\": {\\\"use_search\\\": \\\"No\\\", \\\"__current_case__\\\": 1}, \\\"splice_mismatches\\\": \\\"0\\\", \\\"anchor_length\\\": \\\"5\\\", \\\"mate_std_dev\\\": \\\"20\\\", \\\"own_junctions\\\": {\\\"gene_model_ann\\\": {\\\"gene_annotation_model\\\": null, \\\"use_annotations\\\": \\\"Yes\\\", \\\"__current_case__\\\": 1}, \\\"no_novel_juncs\\\": \\\"No\\\", \\\"use_junctions\\\": \\\"Yes\\\", \\\"__current_case__\\\": 0, \\\"raw_juncs\\\": {\\\"use_juncs\\\": \\\"No\\\", \\\"__current_case__\\\": 0}}, \\\"seg_mismatches\\\": \\\"2\\\", \\\"initial_read_mismatches\\\": \\\"2\\\", \\\"max_segment_intron\\\": \\\"500000\\\"}, \\\"__current_case__\\\": 1, \\\"mate_inner_distance\\\": \\\"150\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\"\", \"__rerun_remap_job_id__\": null, \"refGenomeSource\": \"{\\\"genomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\"}", 
+            "tool_version": "1.5.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "6": {
+            "annotation": "", 
+            "id": 6, 
+            "input_connections": {
+                "input1": {
+                    "id": 3, 
+                    "output_name": "out_file1"
+                }, 
+                "refGenomeSource|ownFile": {
+                    "id": 2, 
+                    "output_name": "output"
+                }, 
+                "singlePaired|input2": {
+                    "id": 4, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Tophat for Illumina", 
+            "outputs": [
+                {
+                    "name": "insertions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "deletions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "junctions", 
+                    "type": "bed"
+                }, 
+                {
+                    "name": "accepted_hits", 
+                    "type": "bam"
+                }
+            ], 
+            "position": {
+                "left": 1057, 
+                "top": 755
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionaccepted_hits": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "accepted_hits"
+                }, 
+                "HideDatasetActiondeletions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "deletions"
+                }, 
+                "HideDatasetActioninsertions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "insertions"
+                }, 
+                "HideDatasetActionjunctions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "junctions"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tophat/tophat/1.5.0", 
+            "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"singlePaired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"pParams\\\": {\\\"library_type\\\": \\\"fr-unstranded\\\", \\\"closure_search\\\": {\\\"use_search\\\": \\\"No\\\", \\\"__current_case__\\\": 1}, \\\"__current_case__\\\": 1, \\\"indel_search\\\": {\\\"max_insertion_length\\\": \\\"3\\\", \\\"max_deletion_length\\\": \\\"3\\\", \\\"__current_case__\\\": 1, \\\"allow_indel_search\\\": \\\"Yes\\\"}, \\\"seg_length\\\": \\\"25\\\", \\\"max_intron_length\\\": \\\"500000\\\", \\\"pSettingsType\\\": \\\"full\\\", \\\"min_intron_length\\\": \\\"70\\\", \\\"microexon_search\\\": \\\"No\\\", \\\"min_segment_intron\\\": \\\"50\\\", \\\"max_multihits\\\": \\\"20\\\", \\\"coverage_search\\\": {\\\"use_search\\\": \\\"No\\\", \\\"__current_case__\\\": 1}, \\\"splice_mismatches\\\": \\\"0\\\", \\\"anchor_length\\\": \\\"5\\\", \\\"mate_std_dev\\\": \\\"20\\\", \\\"own_junctions\\\": {\\\"gene_model_ann\\\": {\\\"gene_annotation_model\\\": null, \\\"use_annotations\\\": \\\"Yes\\\", \\\"__current_case__\\\": 1}, \\\"no_novel_juncs\\\": \\\"Yes\\\", \\\"use_junctions\\\": \\\"Yes\\\", \\\"__current_case__\\\": 0, \\\"raw_juncs\\\": {\\\"use_juncs\\\": \\\"No\\\", \\\"__current_case__\\\": 0}}, \\\"seg_mismatches\\\": \\\"2\\\", \\\"initial_read_mismatches\\\": \\\"2\\\", \\\"max_segment_intron\\\": \\\"500000\\\"}, \\\"__current_case__\\\": 1, \\\"mate_inner_distance\\\": \\\"150\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\"\", \"__rerun_remap_job_id__\": null, \"refGenomeSource\": \"{\\\"genomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\"}", 
+            "tool_version": "1.5.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "7": {
+            "annotation": "", 
+            "id": 7, 
+            "input_connections": {
+                "guided_junctions": {
+                    "id": 6, 
+                    "output_name": "junctions"
+                }, 
+                "input_bed": {
+                    "id": 5, 
+                    "output_name": "junctions"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Filter BED on splice junctions", 
+            "outputs": [
+                {
+                    "name": "novel_junctions", 
+                    "type": "bed"
+                }
+            ], 
+            "position": {
+                "left": 1300, 
+                "top": 453
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionnovel_junctions": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "novel_junctions"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/filter_bed_on_splice_junctions/filter_bed_on_splice_junctions/0.0.1", 
+            "tool_state": "{\"__page__\": 0, \"input_bed\": \"null\", \"__rerun_remap_job_id__\": null, \"leading_bp\": \"\\\"66\\\"\", \"guided_junctions\": \"null\", \"trailing_bp\": \"\\\"66\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\"\"}", 
+            "tool_version": "0.0.1", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "8": {
+            "annotation": "", 
+            "id": 8, 
+            "input_connections": {
+                "input": {
+                    "id": 7, 
+                    "output_name": "novel_junctions"
+                }, 
+                "seq_source|ref_file": {
+                    "id": 2, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Extract Genomic DNA", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 1499, 
+                "top": 668
+            }, 
+            "post_job_actions": {}, 
+            "tool_errors": null, 
+            "tool_id": "Extract genomic DNA 1", 
+            "tool_state": "{\"out_format\": \"\\\"interval\\\"\", \"__page__\": 0, \"interpret_features\": \"\\\"no\\\"\", \"__rerun_remap_job_id__\": null, \"seq_source\": \"{\\\"index_source\\\": \\\"history\\\", \\\"ref_file\\\": null, \\\"__current_case__\\\": 1}\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\"\"}", 
+            "tool_version": "2.2.3", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "9": {
+            "annotation": "", 
+            "id": 9, 
+            "input_connections": {
+                "input": {
+                    "id": 8, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Translate BED Sequences", 
+            "outputs": [
+                {
+                    "name": "output", 
+                    "type": "fasta"
+                }
+            ], 
+            "position": {
+                "left": 1763, 
+                "top": 581
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutput": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/translate_bed_sequences/translate_bed_sequences/0.0.1", 
+            "tool_state": "{\"trim\": \"{\\\"trimseqs\\\": \\\"yes\\\", \\\"__current_case__\\\": 1}\", \"min_length\": \"\\\"10\\\"\", \"reference\": \"\\\"\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"filter\": \"{\\\"filterseqs\\\": \\\"yes\\\", \\\"trailing_bp\\\": \\\"66\\\", \\\"leading_bp\\\": \\\"66\\\", \\\"__current_case__\\\": 0}\", \"score_name\": \"\\\"depth\\\"\", \"input\": \"null\", \"seqtype\": \"\\\"pep:splice\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCh37_canon.len\\\"\"}", 
+            "tool_version": "0.0.1", 
+            "type": "tool", 
+            "user_outputs": []
+        }
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Mon Mar 17 09:19:07 2014 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<repositories description="Required tools for proteomics_rnaseq_splice_db_workflow">
+    <repository name="tophat" owner="devteam" />
+    <repository name="filter_bed_on_splice_junctions" owner="jjohnson" />
+    <repository name="translate_bed_sequences" owner="jjohnson" />
+</repositories>