Galaxy |

Changeset 2:04e170c09922 (2013-04-12)

Previous changeset 1:6e49aa2461bd (2013-03-28) Next changeset 3:0b3a14a7b5f1 (2013-04-12)

Commit message:
Add a workflow for noraml tissue samples.

modified:
Galaxy-Workflow-MMuFF_v1.2.ga

added:
Galaxy-Workflow-MMuFF_Human_germline_v1.2.ga
README

diff -r 6e49aa2461bd -r 04e170c09922 Galaxy-Workflow-MMuFF_Human_germline_v1.2.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-MMuFF_Human_germline_v1.2.ga Fri Apr 12 07:51:45 2013 -0500

[

b'@@ -0,0 +1,604 @@\n+{\n+ "a_galaxy_workflow": "true", \n+ "annotation": "", \n+ "format-version": "0.1", \n+ "name": "MMuFF_Human_germline_v1.2", \n+ "steps": {\n+ "0": {\n+ "annotation": "Human RNA-seq paired reads left mates", \n+ "id": 0, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "Human RNA-seq paired reads left mates", \n+ "name": "Forward reads fastq"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 240, \n+ "top": 287\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"Forward reads fastq\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "1": {\n+ "annotation": "Human RNA-seq paired reads right mates", \n+ "id": 1, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "Human RNA-seq paired reads right mates", \n+ "name": "Reverse reads fastq"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 249, \n+ "top": 383\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"Reverse reads fastq\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "2": {\n+ "annotation": "Known SNPs - dbSNP.vcf", \n+ "id": 2, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "Known SNPs - dbSNP.vcf", \n+ "name": "VCF file of known variants (dbSNP)"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 2327, \n+ "top": 758\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"VCF file of known variants (dbSNP)\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "3": {\n+ "annotation": "Convert quality scores to sanger scale", \n+ "id": 3, \n+ "input_connections": {\n+ "input_file": {\n+ "id": 0, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [\n+ {\n+ "description": "runtime parameter for tool FASTQ Groomer", \n+ "name": "input_type"\n+ }\n+ ], \n+ "name": "FASTQ Groomer", \n+ "outputs": [\n+ {\n+ "name": "output_file", \n+ "type": "fastqsanger"\n+ }\n+ ], \n+ "position": {\n+ "left": 510, \n+ "top": 264\n+ }, \n+ "post_job_actions": {}, \n+ "tool_errors": null, \n+ "tool_id": "fastq_groomer", \n+ "tool_state": "{\\"__page__\\": 0, \\"input_type\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeValue\\\\\\"}\\", \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/?.len\\\\\\"\\", \\"options_type\\": \\"{\\\\\\"options_type_selector\\\\\\": \\\\\\"basic\\\\\\", \\\\\\"__current_case__\\\\\\": 0}\\", \\"input_file\\": \\"null\\"}", \n+ "tool_version": "1.0.4", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "4": {\n+ "annotation": "Convert quality scores to sanger scale", \n+ "id": 4, \n+ '..b' }\n+ }, \n+ "inputs": [], \n+ "name": "MPileup", \n+ "outputs": [\n+ {\n+ "name": "output_mpileup", \n+ "type": "pileup"\n+ }, \n+ {\n+ "name": "output_log", \n+ "type": "txt"\n+ }\n+ ], \n+ "position": {\n+ "left": 3554, \n+ "top": 265\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput_log": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output_log"\n+ }, \n+ "HideDatasetActionoutput_mpileup": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output_mpileup"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "samtools_mpileup", \n+ "tool_state": "{\\"__page__\\": 0, \\"advanced_options\\": \\"{\\\\\\"max_reads_per_bam\\\\\\": \\\\\\"2500\\\\\\", \\\\\\"advanced_options_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"extended_BAQ_computation\\\\\\": \\\\\\"False\\\\\\", \\\\\\"region_string\\\\\\": \\\\\\"\\\\\\", \\\\\\"output_per_sample_strand_bias_p_value\\\\\\": \\\\\\"False\\\\\\", \\\\\\"minimum_base_quality\\\\\\": \\\\\\"20\\\\\\", \\\\\\"disable_probabilistic_realignment\\\\\\": \\\\\\"True\\\\\\", \\\\\\"skip_anomalous_read_pairs\\\\\\": \\\\\\"True\\\\\\", \\\\\\"minimum_mapping_quality\\\\\\": \\\\\\"0\\\\\\", \\\\\\"output_per_sample_read_depth\\\\\\": \\\\\\"False\\\\\\", \\\\\\"__current_case__\\\\\\": 0, \\\\\\"position_list\\\\\\": null, \\\\\\"coefficient_for_downgrading\\\\\\": \\\\\\"0\\\\\\"}\\", \\"chromInfo\\": \\"\\\\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/hg19_canonical.len\\\\\\"\\", \\"genotype_likelihood_computation_type\\": \\"{\\\\\\"genotype_likelihood_computation_type_selector\\\\\\": \\\\\\"do_not_perform_genotype_likelihood_computation\\\\\\", \\\\\\"__current_case__\\\\\\": 1}\\", \\"reference_source\\": \\"{\\\\\\"ref_file\\\\\\": \\\\\\"hg19_canonical\\\\\\", \\\\\\"reference_source_selector\\\\\\": \\\\\\"cached\\\\\\", \\\\\\"input_bams\\\\\\": [{\\\\\\"__index__\\\\\\": 0, \\\\\\"input_bam\\\\\\": null}], \\\\\\"__current_case__\\\\\\": 0}\\"}", \n+ "tool_version": "0.0.1", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "16": {\n+ "annotation": "Minimum Base Quality: 30\\nMinimum Coverage Depth: 5\\nMinimum Frequency of a Specific Allele: .33\\nVCF_ID = germline", \n+ "id": 16, \n+ "input_connections": {\n+ "input_file": {\n+ "id": 15, \n+ "output_name": "output_mpileup"\n+ }\n+ }, \n+ "inputs": [\n+ {\n+ "description": "runtime parameter for tool Pileup to VCF", \n+ "name": "min_base_qual"\n+ }\n+ ], \n+ "name": "Pileup to VCF", \n+ "outputs": [\n+ {\n+ "name": "output_file", \n+ "type": "vcf"\n+ }\n+ ], \n+ "position": {\n+ "left": 3937, \n+ "top": 258\n+ }, \n+ "post_job_actions": {}, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/pileup_to_vcf/pileup_to_vcf/2.0", \n+ "tool_state": "{\\"snps_only\\": \\"\\\\\\"False\\\\\\"\\", \\"min_cvrg\\": \\"\\\\\\"5\\\\\\"\\", \\"allow_multiples\\": \\"\\\\\\"True\\\\\\"\\", \\"input_file\\": \\"null\\", \\"__page__\\": 0, \\"vcf_id\\": \\"\\\\\\"germline\\\\\\"\\", \\"cols\\": \\"{\\\\\\"select_order\\\\\\": \\\\\\"no\\\\\\", \\\\\\"__current_case__\\\\\\": 0}\\", \\"depth_as\\": \\"\\\\\\"ref\\\\\\"\\", \\"min_base_qual\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeValue\\\\\\"}\\", \\"min_var_pct\\": \\"\\\\\\"0.33\\\\\\"\\"}", \n+ "tool_version": "2.2", \n+ "type": "tool", \n+ "user_outputs": []\n+ }\n+ }\n+}\n\\ No newline at end of file\n'

diff -r 6e49aa2461bd -r 04e170c09922 Galaxy-Workflow-MMuFF_v1.2.ga
--- a/Galaxy-Workflow-MMuFF_v1.2.ga Thu Mar 28 15:06:33 2013 -0500
+++ b/Galaxy-Workflow-MMuFF_v1.2.ga Fri Apr 12 07:51:45 2013 -0500

[

b'@@ -2,23 +2,23 @@\n "a_galaxy_workflow": "true", \n "annotation": "", \n "format-version": "0.1", \n- "name": "MMuFF_v1.2", \n+ "name": "MMuFF_Human_v1.2", \n "steps": {\n "0": {\n- "annotation": "", \n+ "annotation": "Human RNA-seq paired reads left mates", \n "id": 0, \n "input_connections": {}, \n "inputs": [\n {\n- "description": "", \n+ "description": "Human RNA-seq paired reads left mates", \n "name": "Forward reads fastq"\n }\n ], \n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 244, \n- "top": 313\n+ "left": 240, \n+ "top": 287\n }, \n "tool_errors": null, \n "tool_id": null, \n@@ -28,20 +28,20 @@\n "user_outputs": []\n }, \n "1": {\n- "annotation": "", \n+ "annotation": "Human RNA-seq paired reads right mates", \n "id": 1, \n "input_connections": {}, \n "inputs": [\n {\n- "description": "", \n+ "description": "Human RNA-seq paired reads right mates", \n "name": "Reverse reads fastq"\n }\n ], \n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 242, \n- "top": 410\n+ "left": 249, \n+ "top": 383\n }, \n "tool_errors": null, \n "tool_id": null, \n@@ -51,20 +51,20 @@\n "user_outputs": []\n }, \n "2": {\n- "annotation": "dbSNP.vcf", \n+ "annotation": "Known SNPs - dbSNP.vcf", \n "id": 2, \n "input_connections": {}, \n "inputs": [\n {\n- "description": "dbSNP.vcf", \n+ "description": "Known SNPs - dbSNP.vcf", \n "name": "VCF file of known variants (dbSNP)"\n }\n ], \n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 4063, \n- "top": 532\n+ "left": 2327, \n+ "top": 758\n }, \n "tool_errors": null, \n "tool_id": null, \n@@ -74,12 +74,12 @@\n "user_outputs": []\n }, \n "3": {\n- "annotation": "These are locations other than those in dbSNP. The ID will be used to annotate the Variants dataset, and then can be filtered out. There needs to be at least one entry for each chromosome: (M,1,2,...,X,Y) even if it is a bogus value at position 1", \n+ "annotation": "These are locations other than those in dbSNP. This could include variants from the subject normal tissue. \\nThe ID will be used to annotate the Variants dataset, and then can be filtered out. There needs to be at least one entry for each chromosome: (M,1,2,...,X,Y)", \n "id": 3, \n "input_connections": {}, \n "inputs": [\n {\n- "description": "These are locations other than those in dbSNP. The ID will be used to annotate the Variants dataset, and then can be filtered out. There needs to be at least one entry for each chromosome: (M,1,2,...,X,Y) even if it is a bogus value at position 1", \n+ "description": "These are locations other than those in dbSNP. This could include variants from the subject normal tissue. \\nThe ID will be used to annotate the Variants dataset, and then can be filtered out. There needs to be at least one entry for each chromosome: (M,1,2,...,X,Y)", \n "name": "VCF file of additional known variants "\n }\n ], \n@@ -87,7 +87,7 @@\n '..b'ame": "SnpEff", \n "outputs": [\n {\n@@ -779,7 +753,7 @@\n ], \n "position": {\n "left": 5277, \n- "top": 466\n+ "top": 442\n }, \n "post_job_actions": {\n "HideDatasetActionstatsFile": {\n@@ -790,7 +764,7 @@\n }, \n "tool_errors": null, \n "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/snpeff/snpEff/3.1", \n- "tool_state": "{\\"filterHomHet\\": \\"\\\\\\"\\\\\\"\\", \\"outputFormat\\": \\"\\\\\\"vcf\\\\\\"\\", \\"filterOut\\": \\"[\\\\\\"downstream\\\\\\", \\\\\\"intergenic\\\\\\", \\\\\\"intron\\\\\\", \\\\\\"upstream\\\\\\", \\\\\\"utr\\\\\\"]\\", \\"inputFormat\\": \\"\\\\\\"vcf\\\\\\"\\", \\"filterIn\\": \\"\\\\\\"\\\\\\"\\", \\"__page__\\": 0, \\"udLength\\": \\"\\\\\\"0\\\\\\"\\", \\"genomeVersion\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"RuntimeValue\\\\\\"}\\", \\"offset\\": \\"\\\\\\"\\\\\\"\\", \\"input\\": \\"null\\"}", \n+ "tool_state": "{\\"filterHomHet\\": \\"\\\\\\"\\\\\\"\\", \\"outputFormat\\": \\"\\\\\\"vcf\\\\\\"\\", \\"filterOut\\": \\"[\\\\\\"downstream\\\\\\", \\\\\\"intergenic\\\\\\", \\\\\\"intron\\\\\\", \\\\\\"upstream\\\\\\", \\\\\\"utr\\\\\\"]\\", \\"inputFormat\\": \\"\\\\\\"vcf\\\\\\"\\", \\"filterIn\\": \\"\\\\\\"\\\\\\"\\", \\"__page__\\": 0, \\"udLength\\": \\"\\\\\\"0\\\\\\"\\", \\"genomeVersion\\": \\"\\\\\\"GRCh37.68\\\\\\"\\", \\"offset\\": \\"\\\\\\"\\\\\\"\\", \\"input\\": \\"null\\"}", \n "tool_version": "3.1", \n "type": "tool", \n "user_outputs": []\n@@ -814,7 +788,7 @@\n ], \n "position": {\n "left": 5595, \n- "top": 429\n+ "top": 405\n }, \n "post_job_actions": {}, \n "tool_errors": null, \n@@ -842,8 +816,8 @@\n }\n ], \n "position": {\n- "left": 5595, \n- "top": 575\n+ "left": 5600, \n+ "top": 584\n }, \n "post_job_actions": {}, \n "tool_errors": null, \n@@ -879,8 +853,8 @@\n }\n ], \n "position": {\n- "left": 5832, \n- "top": 395\n+ "left": 5810, \n+ "top": 364\n }, \n "post_job_actions": {\n "HideDatasetActionhtml_report": {\n@@ -900,7 +874,7 @@\n }\n }, \n "tool_errors": null, \n- "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/snpeff_cds_report/SnpEff-cds-report/1.0", \n+ "tool_id": "SnpEff-cds-report", \n "tool_state": "{\\"ensembl_host\\": \\"\\\\\\"feb2012\\\\\\"\\", \\"report_format\\": \\"[\\\\\\"html\\\\\\", \\\\\\"tsv\\\\\\"]\\", \\"ensembl_dataset\\": \\"\\\\\\"hsapiens_gene_ensembl\\\\\\"\\", \\"__page__\\": 0, \\"effects_filter\\": \\"\\\\\\"FRAME_SHIFT\\\\\\"\\", \\"all_effects\\": \\"\\\\\\"False\\\\\\"\\", \\"polya\\": \\"\\\\\\"5\\\\\\"\\", \\"snp_effect_vcf\\": \\"null\\", \\"with_ccds\\": \\"\\\\\\"False\\\\\\"\\"}", \n "tool_version": "1.0", \n "type": "tool", \n@@ -932,8 +906,8 @@\n }\n ], \n "position": {\n- "left": 5838, \n- "top": 549\n+ "left": 5820, \n+ "top": 537\n }, \n "post_job_actions": {\n "HideDatasetActionhtml_report": {\n@@ -953,11 +927,11 @@\n }\n }, \n "tool_errors": null, \n- "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/snpeff_cds_report/SnpEff-cds-report/1.0", \n+ "tool_id": "SnpEff-cds-report", \n "tool_state": "{\\"ensembl_host\\": \\"\\\\\\"feb2012\\\\\\"\\", \\"report_format\\": \\"[\\\\\\"html\\\\\\", \\\\\\"tsv\\\\\\"]\\", \\"ensembl_dataset\\": \\"\\\\\\"hsapiens_gene_ensembl\\\\\\"\\", \\"__page__\\": 0, \\"effects_filter\\": \\"\\\\\\"NON_SYNONYMOUS_CODING\\\\\\"\\", \\"all_effects\\": \\"\\\\\\"False\\\\\\"\\", \\"polya\\": \\"\\\\\\"5\\\\\\"\\", \\"snp_effect_vcf\\": \\"null\\", \\"with_ccds\\": \\"\\\\\\"False\\\\\\"\\"}", \n "tool_version": "1.0", \n "type": "tool", \n "user_outputs": []\n }\n }\n-}\n+}\n\\ No newline at end of file\n'

diff -r 6e49aa2461bd -r 04e170c09922 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Fri Apr 12 07:51:45 2013 -0500

@@ -0,0 +1,21 @@
+MMuFF: Missense Mutation and Frameshift Finder
+analyzes Next Generation Sequencing (NGS) paired read RNA-seq output to reliably identify small frameshift mutations, as well as missense mutations, in highly expressed protein-coding genes. MMuFF ignores known SNPs, low quality reads, and poly-A/T sequences. For each frameshift and missense mutation identified MMuFF provides the location and sequence of the amino acid substitutions in the novel protein candidates for direct input into epitope evaluation tools.
+
+The parameter settings in the workflows are set for human samples.
+
+To execute MMuFF create a Galaxy history and upload the four input files:
+  1. tumor sample forward reads fastq
+  2. tumor sample reverse reads fastq
+  3. dbSNP VCF file
+  4. additional exclusions VCF
+
+Select Galaxy-Workflow-MMuFF_v1.2.ga to Run
+Set input files for Galaxy-Workflow-MMuFF_v1.2.ga
+
+If you have reads from matched tumor/normal tissue samples,
+run the Galaxy-Workflow-MMuFF_Human_germline_v1.2.ga on the noraml samples with inputs:
+  1. normal sample forward reads fastq
+  2. normal sample reverse reads fastq
+  3. dbSNP VCF file
+and use the final VCF as input 4 "additional exclusions VCF" in the Galaxy-Workflow-MMuFF_v1.2.ga workflow.
+