Mercurial > repos > galaxyp > cravatool
changeset 3:a018c44dc18b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
author | galaxyp |
---|---|
date | Fri, 07 Sep 2018 16:53:05 -0400 |
parents | f3027b8f28bd |
children | |
files | cravatp_submit.py cravatp_submit.xml test-data/error.tsv test-data/gene.tsv test-data/noncoding.tsv test-data/variant.tsv |
diffstat | 6 files changed, 79 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/cravatp_submit.py Thu Aug 16 12:28:29 2018 -0400 +++ b/cravatp_submit.py Fri Sep 07 16:53:05 2018 -0400 @@ -24,15 +24,12 @@ # initializes blank parameters chasm_classifier = '' probed_filename = None -intersected_only = False +all_intersect = False vcf_output = None analysis_type = None # # Testing Command -# python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38 -# test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv -# test-data/error.tsv CHASM -—classifier Breast -—proBED -# test-data/MCF7_proBed.bed +# python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38 test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv test-data/error.tsv CHASM -—classifier Breast -—proBED test-data/MCF7_proBed.bed parser = argparse.ArgumentParser() parser.add_argument('cravatInput',help='The filename of the input ' 'CRAVAT-formatted tabular file ' @@ -56,11 +53,8 @@ parser.add_argument('--proBED', help='The filename of the proBED file ' 'containing peptides with genomic ' 'coordinates') -parser.add_argument('--intersectOnly', help='Specifies whether to ' - 'analyze only variants ' - 'intersected between the ' - 'CRAVAT input and proBED ' - 'file') +parser.add_argument('--allIntersect', help='Specifies whether to ' + 'analyze all variants') parser.add_argument('--vcfOutput', help='The output filename of the ' 'intersected VCF file') @@ -78,8 +72,8 @@ chasm_classifier = args.classifier if args.proBED: probed_filename = args.proBED -if args.intersectOnly: - intersected_only = args.intersectOnly +if args.allIntersect: + all_intersect = args.allIntersect if args.vcfOutput: vcf_output = args.vcfOutput @@ -118,7 +112,7 @@ # proteogenomic input (proBED) file if the user specifies that they want # to only include intersected variants or if they want to receive the # intersected VCF as well. -if probed_filename and (vcf_output or intersected_only == 'true'): +if probed_filename and (vcf_output or all_intersect == 'false'): proBED = loadProBED() if not vcf_output: vcf_output = 'intersected_input.vcf' @@ -143,7 +137,7 @@ genpos <= pepposB): tsvout.writerow(row) break -if intersected_only == 'true': +if all_intersect == 'false': input_filename = vcf_output # sets up the parameters for submission to the CRAVAT API
--- a/cravatp_submit.xml Thu Aug 16 12:28:29 2018 -0400 +++ b/cravatp_submit.xml Fri Sep 07 16:53:05 2018 -0400 @@ -1,12 +1,12 @@ -<tool id="cravatp_submit" name="CRAVAT-P Submit, Intersect, Check, and Retrieve" version="1.0.0"> +<tool id="cravatp_submit" name="CRAVAT-P Submit, Intersect, Check, and Retrieve" version="1.1.0"> <description>| Submits, intersects, checks for, and retrieves data for cancer annotation.</description> <command detect_errors="aggressive"> <![CDATA[ #if $proteo.proteoInput == 'yes': #if '$analysis.type' == 'CHASM' or '$analysis.type' == 'CHASM+VEST': - python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --classifier '$analysis.classifier' --proBED '$proBED' --intersectOnly '$proteo.intersectedVariants' --vcfOutput '$vcf_output' + python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --classifier '$analysis.classifier' --proBED '$proBED' Do-allIntersect '$proteo.intersectedVariants' --vcfOutput '$vcf_output' #else: - python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --proBED '$proBED' --intersectOnly '$proteo.intersectedVariants' --vcfOutput '$vcf_output' + python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --proBED '$proBED' --allIntersect '$proteo.intersectedVariants' --vcfOutput '$vcf_output' #end if #else: #if $analysis.type == 'CHASM' or $analysis.type == 'CHASM+VEST': @@ -20,20 +20,20 @@ <inputs> <param format="vcf" name="input" type="data" label="Source file" help="Accepts transcriptomic or genomic inputs (e.g., tabular, VCF). Additional details can be found below."></param> <conditional name="proteo"> - <param name="proteoInput" type="select" label="Intersect with proteogenomic input?" help="Source file (first input) must be in genomic input to enable intersection with this proteogenomic file."> + <param name="proteoInput" type="select" label="Intersect with proteogenomic input?" help="Source file (first input) must be in genomic input to enable intersection with this proteogenomic file. Only variants intersected between the genomic and proteogenomic files are annotated, unless specified otherwise below."> <option value="yes">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <param format="BED" name="proBED" type="data" label="Peptides with Genomic Coordinates (ProBED Format)"></param> - <param name="intersectedVariants" type="boolean" checked="false" label="Submit only intersected variants?" help="Submits the intersected portion of the genomic file to CRAVAT's server. Restricting analysis to only intersected variants takes less time but also provides less-comprehensive results."></param> + <param name="intersectedVariants" type="boolean" checked="false" label="Submit all variants?" help="Submits all variants, including non-intersected variants, to the CRAVAT server. This results in a complete genomic analysis at the expense of a longer runtime."></param> <param name="output_vcf" type="boolean" checked="false" label="Output intersected genomic file?" help="The intersected genomic file (e.g., VCF) will be included as a result."></param> </when> <when value="no"> </when> </conditional> <conditional name="analysis"> - <param format="tabular" name="type" type="select" label="Analysis Program" help="VEST and CHASM are machine learning methods for predicting the pathogenicity and functional significance of variants, respectively."> + <param format="tabular" name="type" type="select" label="Analysis Program" help="CHASM and VEST are machine learning methods for predicting the functional significance and pathogenicity of variants, respectively."> <option value="None">None</option> <option value="CHASM">CHASM</option> <option value="VEST">VEST</option> @@ -110,8 +110,8 @@ </param> </inputs> <outputs> - <collection name="results" type="list" label="CRAVAT Results on ${on_string}"> - <data format="tabular" label="CRAVAT: Gene Level Annotation Report on ${on_string}" name="gene" /> + <collection name="results" type="list" label="CRAVAT-P Results on ${on_string}"> + <data format="tabular" label="CRAVAT: gene Level Annotation Report on ${on_string}" name="gene" /> <data format="tabular" label="CRAVAT: Variant Report on ${on_string}" name="variant" /> <data format="tabular" label="CRAVAT: Non-coding Variant Report on ${on_string}" name="noncoding" /> <data format="tabular" label="CRAVAT: Errors on ${on_string}" name="error" /> @@ -121,6 +121,28 @@ </data> </outputs> <tests> + <!-- Proteogenomic test case --> + <test> + <param name="input" value="Freebayes_one-variant.vcf"/> + <param name="GRCh" value="GRCh38"/> + <param name="variant" value="variant.tsv"/> + <param name="gene" value="gene.tsv"/> + <param name="noncoding" value="noncoding.tsv"/> + <param name="error" value="error.tsv"/> + <param name="type" value="CHASM" /> + <param name="classifier" value="Breast" /> + <param name="proteoInput" value="yes" /> + <param name="proBED" value="MCF7_proBed.bed"/> + <output_collection name="results" type="list"> + <element name="variant"> + <assert_contents> + <has_text text="hg38"/> + <has_text text="UPF1" /> + <not_has_text text="CRABP2"/> + </assert_contents> + </element> + </output_collection> + </test> <!-- GRCh38/hg38 and no analysis test case --> <test> <param name="input" value="Freebayes_one-variant.vcf"/> @@ -202,29 +224,7 @@ </element> </output_collection> </test> - <!-- Proteogenomic test case --> - <test> - <param name="input" value="Freebayes_one-variant.vcf"/> - <param name="GRCh" value="GRCh38"/> - <param name="variant" value="variant.tsv"/> - <param name="gene" value="gene.tsv"/> - <param name="noncoding" value="noncoding.tsv"/> - <param name="error" value="error.tsv"/> - <param name="type" value="CHASM" /> - <param name="classifier" value="Breast" /> - <param name="proteoInput" value="yes" /> - <param name="proBED" value="MCF7_proBed.bed"/> - <output_collection name="results" type="list"> - <element name="variant"> - <assert_contents> - <has_text text="#Variant Report" /> - <has_text text="hg38"/> - <has_text text="UPF1" /> - <has_text text="EAIDSPVSFLVLHNQIR" /> - </assert_contents> - </element> - </output_collection> - </test> + <!-- "Output intersected VCF" test case --> <test> <param name="input" value="Freebayes_one-variant.vcf"/> @@ -240,6 +240,29 @@ <param name="output_vcf" value="true"/> <output name="vcf_output" file="results/intersected_vcf.vcf"/> </test> + <!-- "All proteogenomic variants submitted" test case--> + <test> + <param name="input" value="Freebayes_two-variants.vcf"/> + <param name="GRCh" value="GRCh38"/> + <param name="variant" value="variant.tsv"/> + <param name="gene" value="gene.tsv"/> + <param name="noncoding" value="noncoding.tsv"/> + <param name="error" value="error.tsv"/> + <param name="type" value="CHASM" /> + <param name="classifier" value="Breast" /> + <param name="proteoInput" value="yes" /> + <param name="proBED" value="MCF7_proBed.bed"/> + <param name="intersectedVariants" value="true" /> + <output_collection name="results" type="list"> + <element name="variant"> + <assert_contents> + <has_text text="hg38"/> + <has_text text="UPF1" /> + <has_text text="CRABP2"/> + </assert_contents> + </element> + </output_collection> + </test> <!-- "Only intersected proteogenomic variants submitted" test case--> <test> <param name="input" value="Freebayes_two-variants.vcf"/> @@ -256,9 +279,10 @@ <output_collection name="results" type="list"> <element name="variant"> <assert_contents> + <has_text text="#Variant Report" /> <has_text text="hg38"/> <has_text text="UPF1" /> - <not_has_text text="CRABP2"/> + <has_text text="EAIDSPVSFLVLHNQIR" /> </assert_contents> </element> </output_collection> @@ -290,6 +314,7 @@ ]]> </help> <citations> + <citation type="doi">10.1021/acs.jproteome.8b00404</citation> <citation type="doi">10.1158/0008-5472.CAN-17-0338</citation> <citation type="doi">10.1186/s13059-017-1377-x</citation> </citations>
--- a/test-data/error.tsv Thu Aug 16 12:28:29 2018 -0400 +++ b/test-data/error.tsv Fri Sep 07 16:53:05 2018 -0400 @@ -1,9 +1,9 @@ #Input Errors Report -#2018-08-13 15:36:32.358464 +#2018-09-07 16:42:38.414856 #CRAVAT version: hybrid #Analysis done at http://www.cravat.us. -#Job Id: rsajulga_20180813_113614 -#Input file: Freebayes_two_variants.vcf +#Job Id: rsajulga_20180907_124216 +#Input file: Freebayes_one_variant.vcf #This report shows errors that occurred in the input. #Input coordinate: hg38 genomic. #CHASM classifier: Breast
--- a/test-data/gene.tsv Thu Aug 16 12:28:29 2018 -0400 +++ b/test-data/gene.tsv Fri Sep 07 16:53:05 2018 -0400 @@ -1,9 +1,9 @@ #Gene Level Annotation Report -#2018-08-13 15:36:32.359533 +#2018-09-07 16:42:38.415811 #CRAVAT version: hybrid #Analysis done at http://www.cravat.us. -#Job Id: rsajulga_20180813_113614 -#Input file: Freebayes_two_variants.vcf +#Job Id: rsajulga_20180907_124216 +#Input file: Freebayes_one_variant.vcf #This report shows analysis results at gene level. #The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance. #hg38 genomic. @@ -11,5 +11,4 @@ #For more information on CRAVAT, visit http://www.cravat.us. HUGO symbol Number of variants Sequence ontology CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef Occurrences in COSMIC COSMIC gene count (tissue) Number of samples with gene mutated CHASM gene score CHASM gene p-value CHASM gene FDR VEST gene score (non-silent) VEST gene p-value VEST gene FDR Protein 3D gene Has a mutation in a TCGA Mutation Cluster NCI pathway hits NCI pathway IDs NCI pathway names TARGET CGL driver class -CRABP2 1 MS 37 upper_aerodigestive_tract(3);large_intestine(9);stomach(4);soft_tissue(3);endometrium(4);lung(3);liver(2);skin(4);NS(1);prostate(1);bone(1);kidney(1);breast(1) 1 0.358 0.4176 1 ../MuPIT_Interactive?gm=chr1:156701052 0 -UPF1 1 MS 267 large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15) 1 0.63 0.0394 0.1 ../MuPIT_Interactive?gm=chr19:18856059 0 +UPF1 1 MS 267 large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15) 1 0.63 0.0394 1 ../MuPIT_Interactive?gm=chr19:18856059 0
--- a/test-data/noncoding.tsv Thu Aug 16 12:28:29 2018 -0400 +++ b/test-data/noncoding.tsv Fri Sep 07 16:53:05 2018 -0400 @@ -1,9 +1,9 @@ #Non-coding Variant Report -#2018-08-13 15:36:32.354693 +#2018-09-07 16:42:38.411675 #CRAVAT version: hybrid #Analysis done at http://www.cravat.us. -#Job Id: rsajulga_20180813_113614 -#Input file: Freebayes_two_variants.vcf +#Job Id: rsajulga_20180907_124216 +#Input file: Freebayes_one_variant.vcf #This report shows analysis results at variant level. #hg38 genomic. #Breast
--- a/test-data/variant.tsv Thu Aug 16 12:28:29 2018 -0400 +++ b/test-data/variant.tsv Fri Sep 07 16:53:05 2018 -0400 @@ -1,13 +1,12 @@ #Variant Report -#2018-08-13 15:36:32.354483 +#2018-09-07 16:42:38.411479 #CRAVAT version: hybrid #Analysis done at http://www.cravat.us. -#Job Id: rsajulga_20180813_113614 -#Input file: Freebayes_two_variants.vcf +#Job Id: rsajulga_20180907_124216 +#Input file: Freebayes_one_variant.vcf #This report shows analysis results at variant level. #hg38 genomic. #Breast #For more information on CRAVAT, visit http://www.cravat.us. Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change Reference peptide Variant peptide CHASM p-value CHASM FDR ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency CGL driver class S.O. transcript S.O. transcript strand S.O. all transcripts CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline CHASM transcript CHASM score All transcripts CHASM results ClinVar disease identifier ClinVar XRef COSMIC transcript COSMIC protein change COSMIC variant count ESP6500 AF (European American) ESP6500 AF (African American) HGVS Genomic HGVS Protein HGVS Protein All NCI pathway hits NCI pathway IDs NCI pathway names -1 VAR516_unknown chr1 156701052 + C T unknown CRABP2 MS G24E 0.4176 1 0.0 4.07800406169e-06 2.98044825942e-05 ../MuPIT_Interactive?gm=chr1:156701052 0 122.853 . het 8 20 0.4 ENST00000368221.1 - *ENST00000368221.1:G24E(MS),ENST00000621784.4:G24E(MS),ENST00000368222.7:G24E(MS) ENST00000368221.1 0.358 *ENST00000368221.1:G24E(0.358:0.4176),ENST00000368222.7:G24E(0.358:0.4176),ENST00000621784.4:G24E(0.358:0.4176) 0 0 NC_000001.10:g.156701052C>T ENST00000368221.1:p.Gly24Glu *ENST00000368221.1:p.Gly24Glu,ENST00000621784.4:p.Gly24Glu,ENST00000368222.7:p.Gly24Glu 0 -2 VAR517_unknown chr19 18856059 + C T unknown UPF1 MS A571V EAIDSPVSFLALHNQIR EAIDSPVSFLVLHNQIR 0.0394 COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123 ENST00000599848.5 + ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803.9 0.63 *ENST00000599848.5:A571V(0.61:0.0530),ENST00000262803.9:A560V(0.63:0.0394) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0 +1 VAR516_unknown chr19 18856059 + C T unknown UPF1 MS A571V EAIDSPVSFLALHNQIR EAIDSPVSFLVLHNQIR 0.0394 COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123 ENST00000599848.5 + ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803.9 0.63 *ENST00000599848.5:A571V(0.61:0.0530),ENST00000262803.9:A560V(0.63:0.0394) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0