changeset 3:a018c44dc18b draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
author galaxyp
date Fri, 07 Sep 2018 16:53:05 -0400
parents f3027b8f28bd
children
files cravatp_submit.py cravatp_submit.xml test-data/error.tsv test-data/gene.tsv test-data/noncoding.tsv test-data/variant.tsv
diffstat 6 files changed, 79 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/cravatp_submit.py	Thu Aug 16 12:28:29 2018 -0400
+++ b/cravatp_submit.py	Fri Sep 07 16:53:05 2018 -0400
@@ -24,15 +24,12 @@
 # initializes blank parameters
 chasm_classifier = ''
 probed_filename = None
-intersected_only = False
+all_intersect = False
 vcf_output = None
 analysis_type = None
 
 # # Testing Command
-# python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38
-# test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv
-# test-data/error.tsv CHASM -—classifier Breast -—proBED
-# test-data/MCF7_proBed.bed
+# python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38 test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv test-data/error.tsv CHASM -—classifier Breast -—proBED test-data/MCF7_proBed.bed
 parser = argparse.ArgumentParser()
 parser.add_argument('cravatInput',help='The filename of the input '
                                        'CRAVAT-formatted tabular file '
@@ -56,11 +53,8 @@
 parser.add_argument('--proBED', help='The filename of the proBED file '
                                      'containing peptides with genomic '
                                      'coordinates')
-parser.add_argument('--intersectOnly', help='Specifies whether to '
-                                            'analyze only variants '
-                                            'intersected between the '
-                                            'CRAVAT input and proBED '
-                                            'file')
+parser.add_argument('--allIntersect', help='Specifies whether to '
+                                            'analyze all variants')
 parser.add_argument('--vcfOutput', help='The output filename of the '
                                         'intersected VCF file')
 
@@ -78,8 +72,8 @@
     chasm_classifier = args.classifier
 if args.proBED:
     probed_filename = args.proBED
-if args.intersectOnly:
-    intersected_only = args.intersectOnly    
+if args.allIntersect:
+    all_intersect = args.allIntersect    
 if args.vcfOutput:
     vcf_output = args.vcfOutput
 
@@ -118,7 +112,7 @@
 # proteogenomic input (proBED) file if the user specifies that they want
 # to only include intersected variants or if they want to receive the
 # intersected VCF as well.
-if probed_filename and (vcf_output or intersected_only == 'true'):
+if probed_filename and (vcf_output or all_intersect == 'false'):
     proBED = loadProBED()
     if not vcf_output:
         vcf_output = 'intersected_input.vcf'
@@ -143,7 +137,7 @@
                                    genpos <= pepposB):
                         tsvout.writerow(row)
                         break
-if intersected_only == 'true':
+if all_intersect == 'false':
     input_filename = vcf_output
 
 # sets up the parameters for submission to the CRAVAT API
--- a/cravatp_submit.xml	Thu Aug 16 12:28:29 2018 -0400
+++ b/cravatp_submit.xml	Fri Sep 07 16:53:05 2018 -0400
@@ -1,12 +1,12 @@
-<tool id="cravatp_submit" name="CRAVAT-P Submit, Intersect, Check, and Retrieve" version="1.0.0">
+<tool id="cravatp_submit" name="CRAVAT-P Submit, Intersect, Check, and Retrieve" version="1.1.0">
     <description>| Submits, intersects, checks for, and retrieves data for cancer annotation.</description>
   <command detect_errors="aggressive">
   <![CDATA[
 #if $proteo.proteoInput == 'yes':
     #if '$analysis.type' == 'CHASM' or '$analysis.type' == 'CHASM+VEST':
-        python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --classifier '$analysis.classifier' --proBED '$proBED' --intersectOnly '$proteo.intersectedVariants' --vcfOutput '$vcf_output'
+        python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --classifier '$analysis.classifier' --proBED '$proBED' Do-allIntersect '$proteo.intersectedVariants' --vcfOutput '$vcf_output'
     #else: 
-        python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --proBED '$proBED' --intersectOnly '$proteo.intersectedVariants' --vcfOutput '$vcf_output' 
+        python '$__tool_directory__/cravatp_submit.py' '$input' '$GRCh' '$variant' '$gene' '$noncoding' '$error' '$analysis.type' --proBED '$proBED' --allIntersect '$proteo.intersectedVariants' --vcfOutput '$vcf_output' 
     #end if
 #else:
     #if $analysis.type == 'CHASM' or $analysis.type == 'CHASM+VEST':
@@ -20,20 +20,20 @@
   <inputs>
     <param format="vcf" name="input" type="data" label="Source file" help="Accepts transcriptomic or genomic inputs (e.g., tabular, VCF). Additional details can be found below."></param>
     <conditional name="proteo">
-      <param name="proteoInput" type="select" label="Intersect with proteogenomic input?" help="Source file (first input) must be in genomic input to enable intersection with this proteogenomic file.">
+      <param name="proteoInput" type="select" label="Intersect with proteogenomic input?" help="Source file (first input) must be in genomic input to enable intersection with this proteogenomic file. Only variants intersected between the genomic and proteogenomic files are annotated, unless specified otherwise below.">
         <option value="yes">Yes</option>
         <option value="no" selected="true">No</option>
       </param>
       <when value="yes">
         <param format="BED" name="proBED" type="data" label="Peptides with Genomic Coordinates (ProBED Format)"></param>
-        <param name="intersectedVariants" type="boolean" checked="false" label="Submit only intersected variants?" help="Submits the intersected portion of the genomic file to CRAVAT's server. Restricting analysis to only intersected variants takes less time but also provides less-comprehensive results."></param>
+        <param name="intersectedVariants" type="boolean" checked="false" label="Submit all variants?" help="Submits all variants, including non-intersected variants, to the CRAVAT server. This results in a complete genomic analysis at the expense of a longer runtime."></param>
         <param name="output_vcf" type="boolean" checked="false" label="Output intersected genomic file?" help="The intersected genomic file (e.g., VCF) will be included as a result."></param>
       </when>
       <when value="no">
       </when>
     </conditional>
     <conditional name="analysis">
-      <param format="tabular" name="type" type="select" label="Analysis Program" help="VEST and CHASM are machine learning methods for predicting the pathogenicity and functional significance of variants, respectively.">
+      <param format="tabular" name="type" type="select" label="Analysis Program" help="CHASM and VEST are machine learning methods for predicting the functional significance and pathogenicity of variants, respectively.">
           <option value="None">None</option>
           <option value="CHASM">CHASM</option>
           <option value="VEST">VEST</option>
@@ -110,8 +110,8 @@
     </param>
   </inputs>
   <outputs>
-    <collection name="results" type="list" label="CRAVAT Results on ${on_string}">
-      <data format="tabular" label="CRAVAT: Gene Level Annotation Report on ${on_string}" name="gene" />
+    <collection name="results" type="list" label="CRAVAT-P Results on ${on_string}">
+      <data format="tabular" label="CRAVAT: gene Level Annotation Report on ${on_string}" name="gene" />
       <data format="tabular" label="CRAVAT: Variant Report on ${on_string}" name="variant" />
       <data format="tabular" label="CRAVAT: Non-coding Variant Report on ${on_string}" name="noncoding" />
       <data format="tabular" label="CRAVAT: Errors on ${on_string}" name="error" />
@@ -121,6 +121,28 @@
       </data>
   </outputs>
   <tests>
+    <!-- Proteogenomic test case -->
+    <test>
+      <param name="input" value="Freebayes_one-variant.vcf"/>
+      <param name="GRCh" value="GRCh38"/>
+      <param name="variant" value="variant.tsv"/>
+      <param name="gene" value="gene.tsv"/>
+      <param name="noncoding" value="noncoding.tsv"/>
+      <param name="error" value="error.tsv"/>
+      <param name="type" value="CHASM" />
+      <param name="classifier" value="Breast" />
+      <param name="proteoInput" value="yes" />
+      <param name="proBED" value="MCF7_proBed.bed"/>
+      <output_collection name="results" type="list">
+        <element name="variant">
+          <assert_contents>
+            <has_text text="hg38"/>
+            <has_text text="UPF1" />
+            <not_has_text text="CRABP2"/>
+          </assert_contents>
+        </element>
+      </output_collection>
+    </test>
     <!-- GRCh38/hg38 and no analysis test case -->
     <test>
       <param name="input" value="Freebayes_one-variant.vcf"/>
@@ -202,29 +224,7 @@
         </element>
       </output_collection>
     </test>
-    <!-- Proteogenomic test case -->
-    <test>
-      <param name="input" value="Freebayes_one-variant.vcf"/>
-      <param name="GRCh" value="GRCh38"/>
-      <param name="variant" value="variant.tsv"/>
-      <param name="gene" value="gene.tsv"/>
-      <param name="noncoding" value="noncoding.tsv"/>
-      <param name="error" value="error.tsv"/>
-      <param name="type" value="CHASM" />
-      <param name="classifier" value="Breast" />
-      <param name="proteoInput" value="yes" />
-      <param name="proBED" value="MCF7_proBed.bed"/>
-      <output_collection name="results" type="list">
-        <element name="variant">
-          <assert_contents>
-            <has_text text="#Variant Report" />
-            <has_text text="hg38"/>
-            <has_text text="UPF1" />
-            <has_text text="EAIDSPVSFLVLHNQIR" />
-          </assert_contents>
-        </element>
-      </output_collection>
-    </test>
+    
     <!-- "Output intersected VCF" test case -->
     <test>
       <param name="input" value="Freebayes_one-variant.vcf"/>
@@ -240,6 +240,29 @@
       <param name="output_vcf" value="true"/>
       <output name="vcf_output" file="results/intersected_vcf.vcf"/>
     </test>
+    <!-- "All proteogenomic variants submitted" test case-->
+    <test>
+      <param name="input" value="Freebayes_two-variants.vcf"/>
+      <param name="GRCh" value="GRCh38"/>
+      <param name="variant" value="variant.tsv"/>
+      <param name="gene" value="gene.tsv"/>
+      <param name="noncoding" value="noncoding.tsv"/>
+      <param name="error" value="error.tsv"/>
+      <param name="type" value="CHASM" />
+      <param name="classifier" value="Breast" />
+      <param name="proteoInput" value="yes" />
+      <param name="proBED" value="MCF7_proBed.bed"/>
+      <param name="intersectedVariants" value="true" />
+      <output_collection name="results" type="list">
+        <element name="variant">
+          <assert_contents>
+            <has_text text="hg38"/>
+            <has_text text="UPF1" />
+            <has_text text="CRABP2"/>
+          </assert_contents>
+        </element>
+      </output_collection>
+    </test>
     <!-- "Only intersected proteogenomic variants submitted" test case-->
     <test>
       <param name="input" value="Freebayes_two-variants.vcf"/>
@@ -256,9 +279,10 @@
       <output_collection name="results" type="list">
         <element name="variant">
           <assert_contents>
+            <has_text text="#Variant Report" />
             <has_text text="hg38"/>
             <has_text text="UPF1" />
-            <not_has_text text="CRABP2"/>
+            <has_text text="EAIDSPVSFLVLHNQIR" />
           </assert_contents>
         </element>
       </output_collection>
@@ -290,6 +314,7 @@
 ]]>
   </help>
   <citations>
+    <citation type="doi">10.1021/acs.jproteome.8b00404</citation>
     <citation type="doi">10.1158/0008-5472.CAN-17-0338</citation>
     <citation type="doi">10.1186/s13059-017-1377-x</citation>
   </citations>
--- a/test-data/error.tsv	Thu Aug 16 12:28:29 2018 -0400
+++ b/test-data/error.tsv	Fri Sep 07 16:53:05 2018 -0400
@@ -1,9 +1,9 @@
 #Input Errors Report
-#2018-08-13 15:36:32.358464
+#2018-09-07 16:42:38.414856
 #CRAVAT version: hybrid
 #Analysis done at http://www.cravat.us.
-#Job Id: rsajulga_20180813_113614
-#Input file: Freebayes_two_variants.vcf
+#Job Id: rsajulga_20180907_124216
+#Input file: Freebayes_one_variant.vcf
 #This report shows errors that occurred in the input.
 #Input coordinate: hg38 genomic.
 #CHASM classifier: Breast
--- a/test-data/gene.tsv	Thu Aug 16 12:28:29 2018 -0400
+++ b/test-data/gene.tsv	Fri Sep 07 16:53:05 2018 -0400
@@ -1,9 +1,9 @@
 #Gene Level Annotation Report
-#2018-08-13 15:36:32.359533
+#2018-09-07 16:42:38.415811
 #CRAVAT version: hybrid
 #Analysis done at http://www.cravat.us.
-#Job Id: rsajulga_20180813_113614
-#Input file: Freebayes_two_variants.vcf
+#Job Id: rsajulga_20180907_124216
+#Input file: Freebayes_one_variant.vcf
 #This report shows analysis results at gene level.
 #The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance.
 #hg38 genomic.
@@ -11,5 +11,4 @@
 #For more information on CRAVAT, visit http://www.cravat.us.
 
 HUGO symbol	Number of variants	Sequence ontology	CGC driver class	CGC inheritance	CGC tumor types somatic	CGC tumor types germline	ClinVar disease identifier	ClinVar XRef	Occurrences in COSMIC	COSMIC gene count (tissue)	Number of samples with gene mutated	CHASM gene score	CHASM gene p-value	CHASM gene FDR	VEST gene score (non-silent)	VEST gene p-value	VEST gene FDR	Protein 3D gene	Has a mutation in a TCGA Mutation Cluster	NCI pathway hits	NCI pathway IDs	NCI pathway names	TARGET	CGL driver class
-CRABP2	1	MS							37	upper_aerodigestive_tract(3);large_intestine(9);stomach(4);soft_tissue(3);endometrium(4);lung(3);liver(2);skin(4);NS(1);prostate(1);bone(1);kidney(1);breast(1)	1	0.358	0.4176	1				../MuPIT_Interactive?gm=chr1:156701052		0				
-UPF1	1	MS							267	large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15)	1	0.63	0.0394	0.1				../MuPIT_Interactive?gm=chr19:18856059		0				
+UPF1	1	MS							267	large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15)	1	0.63	0.0394	1				../MuPIT_Interactive?gm=chr19:18856059		0				
--- a/test-data/noncoding.tsv	Thu Aug 16 12:28:29 2018 -0400
+++ b/test-data/noncoding.tsv	Fri Sep 07 16:53:05 2018 -0400
@@ -1,9 +1,9 @@
 #Non-coding Variant Report
-#2018-08-13 15:36:32.354693
+#2018-09-07 16:42:38.411675
 #CRAVAT version: hybrid
 #Analysis done at http://www.cravat.us.
-#Job Id: rsajulga_20180813_113614
-#Input file: Freebayes_two_variants.vcf
+#Job Id: rsajulga_20180907_124216
+#Input file: Freebayes_one_variant.vcf
 #This report shows analysis results at variant level.
 #hg38 genomic.
 #Breast
--- a/test-data/variant.tsv	Thu Aug 16 12:28:29 2018 -0400
+++ b/test-data/variant.tsv	Fri Sep 07 16:53:05 2018 -0400
@@ -1,13 +1,12 @@
 #Variant Report
-#2018-08-13 15:36:32.354483
+#2018-09-07 16:42:38.411479
 #CRAVAT version: hybrid
 #Analysis done at http://www.cravat.us.
-#Job Id: rsajulga_20180813_113614
-#Input file: Freebayes_two_variants.vcf
+#Job Id: rsajulga_20180907_124216
+#Input file: Freebayes_one_variant.vcf
 #This report shows analysis results at variant level.
 #hg38 genomic.
 #Breast
 #For more information on CRAVAT, visit http://www.cravat.us.
 Input line	ID	Chromosome	Position	Strand	Reference base(s)	Alternate base(s)	Sample ID	HUGO symbol	Sequence ontology	Protein sequence change	Reference peptide	Variant peptide	CHASM p-value	CHASM FDR	ClinVar	COSMIC ID	COSMIC variant count (tissue)	Number of samples with variant	dbSNP	ESP6500 AF (average)	gnomAD AF Total	gnomAD AF African	gnomAD AF American	gnomAD AF Ashkenazi Jewish	gnomAD AF East Asian	gnomAD AF Finnish	gnomAD AF Non-Finnish European	gnomAD AF Other	gnomAD AF South Asian	GWAS NHLBI Key (GRASP)	GWAS PMID (GRASP)	GWAS Phenotype (GRASP)	Protein 3D variant	In TCGA Mutation Cluster	ncRNA Class	ncRNA Name	Pseudogene	Pseudogene Transcript	Repeat Class	Repeat Family	Repeat Name	TARGET	1000 Genomes AF	UTR/Intron	UTR/Intron Gene	UTR/Intron All Transcript	Phred	VCF filters	Zygosity	Alternate reads	Total reads	Variant allele frequency	CGL driver class	S.O. transcript	S.O. transcript strand	S.O. all transcripts	CGC driver class	CGC inheritance	CGC tumor types somatic	CGC tumor types germline	CHASM transcript	CHASM score	All transcripts CHASM results	ClinVar disease identifier	ClinVar XRef	COSMIC transcript	COSMIC protein change	COSMIC variant count	ESP6500 AF (European American)	ESP6500 AF (African American)	HGVS Genomic	HGVS Protein	HGVS Protein All	NCI pathway hits	NCI pathway IDs	NCI pathway names
-1	VAR516_unknown	chr1	156701052	+	C	T	unknown	CRABP2	MS	G24E			0.4176					1		0.0	4.07800406169e-06		2.98044825942e-05										../MuPIT_Interactive?gm=chr1:156701052										0				122.853	.	het	8	20	0.4		ENST00000368221.1	-	*ENST00000368221.1:G24E(MS),ENST00000621784.4:G24E(MS),ENST00000368222.7:G24E(MS)					ENST00000368221.1	0.358	*ENST00000368221.1:G24E(0.358:0.4176),ENST00000368222.7:G24E(0.358:0.4176),ENST00000621784.4:G24E(0.358:0.4176)						0	0	NC_000001.10:g.156701052C>T	ENST00000368221.1:p.Gly24Glu	*ENST00000368221.1:p.Gly24Glu,ENST00000621784.4:p.Gly24Glu,ENST00000368222.7:p.Gly24Glu	0		
-2	VAR517_unknown	chr19	18856059	+	C	T	unknown	UPF1	MS	A571V	EAIDSPVSFLALHNQIR	EAIDSPVSFLVLHNQIR	0.0394			COSM3100527	large_intestine(1)	1		0.0													../MuPIT_Interactive?gm=chr19:18856059										0				10269.5	.	het	592	2379	0.248844052123		ENST00000599848.5	+	ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS)					ENST00000262803.9	0.63	*ENST00000599848.5:A571V(0.61:0.0530),ENST00000262803.9:A560V(0.63:0.0394)			ENST00000262803	p.A560V (large_intestine 1)	1	0	0	NC_000019.10:g.18856059C>T	ENST00000599848.5:p.Ala571Val	ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val	0		
+1	VAR516_unknown	chr19	18856059	+	C	T	unknown	UPF1	MS	A571V	EAIDSPVSFLALHNQIR	EAIDSPVSFLVLHNQIR	0.0394			COSM3100527	large_intestine(1)	1		0.0													../MuPIT_Interactive?gm=chr19:18856059										0				10269.5	.	het	592	2379	0.248844052123		ENST00000599848.5	+	ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS)					ENST00000262803.9	0.63	*ENST00000599848.5:A571V(0.61:0.0530),ENST00000262803.9:A560V(0.63:0.0394)			ENST00000262803	p.A560V (large_intestine 1)	1	0	0	NC_000019.10:g.18856059C>T	ENST00000599848.5:p.Ala571Val	ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val	0