Mercurial > repos > ryanmorin > nextgen_variant_identification
changeset 6:361d6506850a
Uploaded
author | ryanmorin |
---|---|
date | Tue, 18 Oct 2011 18:32:07 -0400 |
parents | a4975ec34575 |
children | 351b3acadd17 |
files | Galaxy-Workflow-Genome_Exome_paired_analysis_(SNVMix1).ga SNV/README SNV/filter_snvmix_somatic.py SNV/snp_filters.py SNV/tool-data/tool_conf.xml.sample sam_fa_indices.loc.sample tool_conf.xml.sample |
diffstat | 7 files changed, 18 insertions(+), 492 deletions(-) [+] |
line wrap: on
line diff
--- a/Galaxy-Workflow-Genome_Exome_paired_analysis_(SNVMix1).ga Mon Oct 17 14:57:09 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,450 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "", - "format-version": "0.1", - "name": "Genome/Exome paired analysis (SNVMix1)", - "steps": { - "0": { - "annotation": "", - "id": 0, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Tumour bam file (rmdup or dups flagged)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 205, - "top": 458 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Tumour bam file (rmdup or dups flagged)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "1": { - "annotation": "", - "id": 1, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Codon resource (codon_lookup_new.sort)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 205, - "top": 524 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Codon resource (codon_lookup_new.sort)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "2": { - "annotation": "", - "id": 2, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "SNP resource (all_known_snps.txt.current)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 204, - "top": 588 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"SNP resource (all_known_snps.txt.current)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "3": { - "annotation": "", - "id": 3, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Normal bam file (rmdup or dups flagged)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 205, - "top": 652 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Normal bam file (rmdup or dups flagged)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "4": { - "annotation": "", - "id": 4, - "input_connections": { - "refOrHistory|input1": { - "id": 0, - "output_name": "output" - } - }, - "inputs": [], - "name": "SNVMix", - "outputs": [ - { - "name": "output_snvmix", - "type": "tabular" - } - ], - "position": { - "left": 265.5, - "top": 333.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "snvmix", - "tool_state": "{\"q\": \"\\\"19\\\"\", \"positionFile\": \"{\\\"positions\\\": \\\"no\\\", \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"keep_dups\": \"\\\"no\\\"\", \"Q\": \"\\\"19\\\"\", \"full\": \"\\\"no\\\"\", \"keep_chastity\": \"\\\"no\\\"\", \"type\": \"\\\"SNVMix1\\\"\"}", - "tool_version": "0.12.1-rc1", - "type": "tool", - "user_outputs": [] - }, - "5": { - "annotation": "", - "id": 5, - "input_connections": { - "input1": { - "id": 4, - "output_name": "output_snvmix" - } - }, - "inputs": [], - "name": "SNVMix filter", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 554, - "top": 268 - }, - "post_job_actions": { - "RenameDatasetActionoutput1": { - "action_arguments": { - "newname": "Filtered SNVMix output" - }, - "action_type": "RenameDatasetAction", - "output_name": "output1" - } - }, - "tool_errors": null, - "tool_id": "filter_snvmix", - "tool_state": "{\"__page__\": 0, \"require_dual_strand\": \"\\\"yes\\\"\", \"input1\": \"null\", \"max_indels\": \"\\\"1\\\"\"}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "6": { - "annotation": "", - "id": 6, - "input_connections": { - "codon_resource": { - "id": 1, - "output_name": "output" - }, - "input1": { - "id": 5, - "output_name": "output1" - }, - "known_snp_resource": { - "id": 2, - "output_name": "output" - } - }, - "inputs": [], - "name": "SNP filtering and pre-annotation", - "outputs": [ - { - "name": "output1", - "type": "tabular" - }, - { - "name": "output2", - "type": "tabular" - } - ], - "position": { - "left": 539.5, - "top": 467 - }, - "post_job_actions": { - "RenameDatasetActionoutput2": { - "action_arguments": { - "newname": "codon-linked novel SNVs" - }, - "action_type": "RenameDatasetAction", - "output_name": "output2" - } - }, - "tool_errors": null, - "tool_id": "snp_filters", - "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"known_snp_resource\": \"null\", \"codon_resource\": \"null\"}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "7": { - "annotation": "", - "id": 7, - "input_connections": { - "input_codon": { - "id": 6, - "output_name": "output2" - } - }, - "inputs": [], - "name": "SNV Annotator", - "outputs": [ - { - "name": "output_anno", - "type": "tabular" - } - ], - "position": { - "left": 891, - "top": 223 - }, - "post_job_actions": { - "RenameDatasetActionoutput_anno": { - "action_arguments": { - "newname": "Annotated novel SNVs from tumour" - }, - "action_type": "RenameDatasetAction", - "output_name": "output_anno" - } - }, - "tool_errors": null, - "tool_id": "snv_annotate", - "tool_state": "{\"__page__\": 0, \"input_codon\": \"null\"}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "8": { - "annotation": "", - "id": 8, - "input_connections": { - "input": { - "id": 7, - "output_name": "output_anno" - } - }, - "inputs": [], - "name": "Select", - "outputs": [ - { - "name": "out_file1", - "type": "input" - } - ], - "position": { - "left": 896, - "top": 330 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "Grep1", - "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"invert\": \"\\\"false\\\"\", \"pattern\": \"\\\"CODING\\\"\"}", - "tool_version": "1.0.1", - "type": "tool", - "user_outputs": [] - }, - "9": { - "annotation": "", - "id": 9, - "input_connections": { - "input": { - "id": 8, - "output_name": "out_file1" - } - }, - "inputs": [], - "name": "Cut", - "outputs": [ - { - "name": "out_file1", - "type": "tabular" - } - ], - "position": { - "left": 899, - "top": 420 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "Cut1", - "tool_state": "{\"columnList\": \"\\\"c1\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"Sp\\\"\", \"__page__\": 0}", - "tool_version": "1.0.1", - "type": "tool", - "user_outputs": [] - }, - "10": { - "annotation": "", - "id": 10, - "input_connections": { - "input": { - "id": 9, - "output_name": "out_file1" - } - }, - "inputs": [], - "name": "Convert", - "outputs": [ - { - "name": "out_file1", - "type": "tabular" - } - ], - "position": { - "left": 903, - "top": 513 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "Convert characters1", - "tool_state": "{\"input\": \"null\", \"convert_from\": \"\\\"Co\\\"\", \"__page__\": 0}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "11": { - "annotation": "", - "id": 11, - "input_connections": { - "input1": { - "id": 10, - "output_name": "out_file1" - } - }, - "inputs": [], - "name": "Trim", - "outputs": [ - { - "name": "out_file1", - "type": "input" - } - ], - "position": { - "left": 904, - "top": 607 - }, - "post_job_actions": { - "RenameDatasetActionout_file1": { - "action_arguments": { - "newname": "" - }, - "action_type": "RenameDatasetAction", - "output_name": "out_file1" - } - }, - "tool_errors": null, - "tool_id": "trimmer", - "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"end\": \"\\\"0\\\"\", \"fastq\": \"\\\"\\\"\", \"ignore\": \"null\", \"start\": \"\\\"4\\\"\", \"col\": \"\\\"0\\\"\"}", - "tool_version": "0.0.1", - "type": "tool", - "user_outputs": [] - }, - "12": { - "annotation": "", - "id": 12, - "input_connections": { - "pos": { - "id": 11, - "output_name": "out_file1" - }, - "refOrHistory|input1": { - "id": 3, - "output_name": "output" - } - }, - "inputs": [], - "name": "SNVMix at selected positions", - "outputs": [ - { - "name": "output_snvmix", - "type": "tabular" - } - ], - "position": { - "left": 847.5, - "top": 711 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "snvmix2", - "tool_state": "{\"q\": \"\\\"19\\\"\", \"full\": \"\\\"yes\\\"\", \"__page__\": 0, \"refOrHistory\": \"{\\\"input1\\\": null, \\\"reference\\\": \\\"indexed\\\", \\\"__current_case__\\\": 0}\", \"keep_dups\": \"\\\"no\\\"\", \"pos\": \"null\", \"Q\": \"\\\"19\\\"\", \"keep_chastity\": \"\\\"no\\\"\", \"type\": \"\\\"MB\\\"\"}", - "tool_version": "0.12.1-rc1", - "type": "tool", - "user_outputs": [] - }, - "13": { - "annotation": "", - "id": 13, - "input_connections": { - "input1": { - "id": 12, - "output_name": "output_snvmix" - }, - "input2": { - "id": 7, - "output_name": "output_anno" - } - }, - "inputs": [], - "name": "Get somatic positions from germline SNVMix output", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 1129, - "top": 474.5 - }, - "post_job_actions": { - "RenameDatasetActionoutput1": { - "action_arguments": { - "newname": "Final somatic calls (SNVMix1)" - }, - "action_type": "RenameDatasetAction", - "output_name": "output1" - } - }, - "tool_errors": null, - "tool_id": "snvmix_somatic_filter", - "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"nonref_support\": \"\\\"2\\\"\", \"input1\": \"null\", \"posterior\": \"\\\"0.999\\\"\"}", - "tool_version": "0.12.1-rc1", - "type": "tool", - "user_outputs": [] - } - } -} \ No newline at end of file
--- a/SNV/README Mon Oct 17 14:57:09 2011 -0400 +++ b/SNV/README Tue Oct 18 18:32:07 2011 -0400 @@ -3,8 +3,11 @@ Installation ------------ -1) Place these files in $GALAXY_HOME/tools -2) Modify your configuration files appropriately, for example, add the tools to $GALAXY_HOME/tool_conf.xml (under the NGS analysis section, create a "variant calling" section) +1) Place these files in $GALAXY_HOME/tools and compile/install SNVMix2 if you haven't already done so (also copy/link the SNVMix2 binary, +identify_nonsynonymous_mutations.pl and filter_snvmix.pl to /usr/local/bin or some other location in the default PATH, also ensure they are executable) +2) Modify your configuration files appropriately +-add the tools to $GALAXY_HOME/tool_conf.xml (under the NGS analysis section, create a "variant calling" section), see the tool_conf.xml.sample for an example +-create or modify $GALAXY_HOME/sam_fa_indices.loc to match the example provided (points galaxy to the fasta files containing the genome that was used during the alignment step). These are needed by SNVMix. Requirements ------------
--- a/SNV/filter_snvmix_somatic.py Mon Oct 17 14:57:09 2011 -0400 +++ b/SNV/filter_snvmix_somatic.py Tue Oct 18 18:32:07 2011 -0400 @@ -17,7 +17,7 @@ import pkg_resources; pkg_resources.require( "bx-python" ) from bx.cookbook import doc_optparse import re - +os.environ['LC_COLLATE'] = 'C' def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) sys.exit()
--- a/SNV/snp_filters.py Mon Oct 17 14:57:09 2011 -0400 +++ b/SNV/snp_filters.py Tue Oct 18 18:32:07 2011 -0400 @@ -19,6 +19,7 @@ from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from bx.cookbook import doc_optparse +os.environ['LC_COLLATE'] = 'C' def stop_err( msg ): sys.stderr.write( '%s\n' % msg )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNV/tool-data/tool_conf.xml.sample Tue Oct 18 18:32:07 2011 -0400 @@ -0,0 +1,11 @@ +#add the following section to your file to enable these tools + <section name="NGS: Variant Calling" id="SNV"> + <label text="SNV pipeline" id="SNV" /> + <tool file="SNV/snvmix.xml" /> + <tool file="SNV/snvmix2.xml" /> + <tool file="SNV/filter_snvmix.xml" /> + <tool file="SNV/snp_filters.xml" /> + <tool file="SNV/annotate.xml" /> + <tool file="SNV/filter_snvmix_somatic.xml" /> + </section> +
--- a/sam_fa_indices.loc.sample Mon Oct 17 14:57:09 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index <seq> <location> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- a/tool_conf.xml.sample Mon Oct 17 14:57:09 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -#add the following section to your file to enable these tools - <section name="NGS: Variant Calling" id="SNV"> - <label text="SNV pipeline" id="SNV" /> - <tool file="SNV/snvmix.xml" /> - <tool file="SNV/snvmix2.xml" /> - <tool file="SNV/filter_snvmix.xml" /> - <tool file="SNV/snp_filters.xml" /> - <tool file="SNV/annotate.xml" /> - <tool file="SNV/filter_snvmix_somatic.xml" /> - </section> -