diff gnali.xml @ 3:02d368ec14cf draft

"planemo upload for repository https://github.com/phac-nml/gnali/ commit 48745557cc8e603b61ba1a78308f72562f559e59"
author nml
date Wed, 13 Jan 2021 18:35:15 +0000
parents 49012f2b4c19
children b6e197aac430
line wrap: on
line diff
--- a/gnali.xml	Mon Apr 20 17:04:56 2020 -0400
+++ b/gnali.xml	Wed Jan 13 18:35:15 2021 +0000
@@ -1,109 +1,122 @@
-<tool id="gnali" name="gNALI" version="0.1.1" python_template_version="3.6">
+<tool id="gnali" name="gNALI" version="1.0.1" python_template_version="3.7">
     <description>Get nonessential, LoF variants</description>
-    <requirements>
-        <requirement type="package" version="0.1.1">gnali</requirement>
-    </requirements>
+    <macros>
+        <import>macros.xml</import>
+    </macros>  
+    <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
-        gnali -i '$test_genes' -o output
+        gnali -i '$test_genes' -o output -d '$database_info.database'
+        #if $database_info.predefined_filters != "None":
+            --predefined_filters '$database_info.predefined_filters'
+        #end if
+        #if len($additional_filters) > 0:
+            --additional_filters 
+            #for $filt in $additional_filters
+                '$filt.filter' 
+            #end for
+        #end if
+        #if $vcf_output:
+            '$vcf_output'
+        #end if
+        #if $pop_freqs:
+            '$pop_freqs'
+        #end if
     ]]></command>
     <inputs>
         <param type="data" name="test_genes" label="Test genes" format="txt" help="Specify a list of genes as HGNC symbols, separated by newline characters" />
-        <param type="select" name="database" label="Database" format="txt" help="Database to query" >
-            <option value="gnomad2.1.1" selected="true">gnomAD2.1.1 (GRCh37/hg19)</option>
-        </param>
+        <param name="vcf_output" type="boolean" truevalue="--vcf" falsevalue="" optional="false" checked="false" label="VCF output" help="Generate vcf file for filtered variants" />
+        <param name="pop_freqs" type="boolean" truevalue="--pop_freqs" falsevalue="" optional="false" checked="false" label="Population frequencies" help="Generate population frequency data for variants that passed filtering" />
+        <conditional name="database_info">
+            <param type="select" name="database" label="Database" format="txt" help="Database to query" >
+                <option value="gnomadv2.1.1" selected="true">gnomADv2.1.1 (GRCh37/hg19)</option>
+                <option value="gnomadv3">gnomADv3 (GRCh38/hg38)</option>
+            </param>
+            <when value="gnomadv2.1.1">
+                <param name="predefined_filters" type="select" display="checkboxes" multiple="True" label="Predefined filters" help="Filter variants by selected filters">
+                    <option value="homozygous-controls">homozygous controls (controls_nhomalt>0)</option>
+                    <option value="heterozygous-controls">heterozygous controls (controls_nhomalt=0)</option>
+                    <option value="nhomalt>0">homozygous (nhomalt>0)</option>
+                </param>
+            </when>
+            <when value="gnomadv3">
+                <param name="predefined_filters" type="select" display="checkboxes" multiple="True" label="Predefined filters" help="Filter variants by selected filters">
+                    <option value="homozygous">homozygous (nhomalt>0)</option>
+                    <option value="heterozygous">heterozygous (nhomalt=0)</option>
+                </param>
+            </when>
+        </conditional>
+        <repeat name="additional_filters" title="Additional filters" min="0" default="0" help="Additional filters (as expressions, ex. AC>10) to apply" >
+            <param name="filter" type="text" optional="False" label="Filter">
+                <sanitizer invalid_char="">
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="&gt;" />
+                        <add value="&lt;" />
+                        <add value="=" />
+                    </valid>
+                </sanitizer>
+            </param>
+        </repeat>
     </inputs>
     <outputs>
-        <data name="basic_output" label="gNALI basic output" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Basic\).txt" />
-        <data name="detailed_output" label="gNALI detailed output" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Detailed\).txt" />
+        <data name="basic_output" label="gNALI basic output on ${test_genes.element_identifier}" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Basic\).txt" />
+        <data name="detailed_output" label="gNALI detailed output on ${test_genes.element_identifier}" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Detailed\).txt" />
+        <data name="variants_vcf_output" label="gNALI variants vcf on ${test_genes.element_identifier}" format="vcf" from_work_dir="output/Nonessential_Gene_Variants.vcf" >
+            <filter>vcf_output</filter>
+        </data>
     </outputs>
     <tests>
         <test>
             <param name="test_genes" value="test_genes.txt"/>
-            <output name="basic_output">
-                <assert_contents>
-                    <has_text text="HGNC_Symbol" />
-                    <has_text text="CCR5" />
-                </assert_contents>
-            </output>
-            <output name="detailed_output">
-                <assert_contents>
-                    <has_text_matching expression="Chromosome\tPosition_Start\tRSID\tReference_Allele\tAlternate_Allele\tScore\tQuality\tLoF_Variant\tLoF_Annotation\tHGNC_Symbol\tEnsembl Code" />
-                    <has_text_matching expression="3\t46414935\trs938517991\tAT\tA\t9974.16\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                    <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t74264261.52\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                    <has_text_matching expression="3\t46415066\trs146972949\tC\tT\t120238.89\tPASS\tT\tstop_gained\tCCR5\tENSG00000160791" />
-                    <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t1947603.90\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                </assert_contents>
+            <param name="predefined_filters" value="homozygous-controls"/>
+            <param name="pop_freqs" value="--pop_freqs"/>
+            <param name="vcf_output" value="--vcf"/>
+            <output name="basic_output"
+                value="results/Nonessential_Host_Genes_Basic.txt"
+                ftype="txt"
+                compare="diff">
             </output>
-        </test>
-        <test>
-            <param name="test_genes" value="patch.txt"/>
-                <output name="basic_output">
-                    <assert_contents>
-                        <has_text text="HGNC_Symbol" />
-                        <has_text text="CCR5" />
-                        <has_text text="RPEL1" />
-                        <has_text text="OTOGL" />
-                        <has_text text="PKD1L2" />
-                        <has_text text="COL6A5" />
-                        <has_text text="DCP1A" />
-                        <has_text text="KRT10" />
-                    </assert_contents>
-                </output>
-                <output name="detailed_output">
-                    <assert_contents>
-                        <has_text_matching expression="Chromosome\tPosition_Start\tRSID\tReference_Allele\tAlternate_Allele\tScore\tQuality\tLoF_Variant\tLoF_Annotation\tHGNC_Symbol\tEnsembl Code" />
-                        <has_text_matching expression="10\t105005931\trs61746130\tC\tT\t4480914.72\tPASS\tT\tstop_gained\tRPEL1\tENSG00000235376" />
-                        <has_text_matching expression="12\t80770908\trs1222716200\tC\tT\t4175.99\tPASS\tT\tstop_gained\tOTOGL\tENSG00000165899" />
-                        <has_text_matching expression="16\t81242148\trs752607955\tGTT\tG\t218022105.25\tPASS\t-\tframeshift_variant\tPKD1L2\tENSG00000166473" />
-                        <has_text_matching expression="16\t81242198\trs7499011\tG\tA\t124146106.95\tPASS\tA\tstop_gained\tPKD1L2\tENSG00000166473" />
-                        <has_text_matching expression="3\t130114290\trs115380050\tC\tT\t105366.23\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130139996\trs139339125\tG\tT\t576280.08\tPASS\tT\tsplice_acceptor_variant\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130159309\t.\tAAT\tA\t3284.41\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130159330\trs2201717\tC\tT\t8770317.31\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130187662\trs115375867\tG\tT\t1785133.61\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130190720\trs11355796\tAT\tA\t157372019.43\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t53324819\trs782498227\tATGGCAC\tA\t304085671.95\tPASS\t-\tsplice_donor_variant&amp;intron_variant\tDCP1A\tENSG00000162290" />
-                        <has_text_matching expression="3\t46414935\trs938517991\tAT\tA\t9974.16\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                        <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t74264261.52\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                        <has_text_matching expression="3\t46415066\trs146972949\tC\tT\t120238.89\tPASS\tT\tstop_gained\tCCR5\tENSG00000160791" />
-                        <has_text_matching expression="16\t81242148\trs752607955\tGTT\tG\t12370921.18\tPASS\t-\tframeshift_variant\tPKD1L2\tENSG00000166473" />
-                        <has_text_matching expression="16\t81242198\trs7499011\tG\tA\t7423817.85\tPASS\tA\tstop_gained\tPKD1L2\tENSG00000166473" />
-                        <has_text_matching expression="17\t38975327\trs764791942\tT\tTAGCCGCCGCC\t282793.87\tPASS\tAGCCGCCGCC\tframeshift_variant\tKRT10\tENSG00000186395" />
-                        <has_text_matching expression="17\t38975329\trs762667965\tG\tGAGCTT\t238711.24\tPASS\tAGCTT\tframeshift_variant\tKRT10\tENSG00000186395" />
-                        <has_text_matching expression="3\t130159330\trs2201717\tC\tT\t165579.83\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t130190720\trs11355796\tAT\tA\t14066881.99\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" />
-                        <has_text_matching expression="3\t53324819\trs782498227\tATGGCAC\tA\t17202744.04\tPASS\t-\tsplice_donor_variant&amp;intron_variant\tDCP1A\tENSG00000162290" />
-                        <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t1947603.90\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" />
-                    </assert_contents>
-                </output>
+            <output name="detailed_output"
+                value="results/Nonessential_Host_Genes_Detailed.txt"
+                ftype="txt"
+                compare="diff">
+            </output>
+            <output name="variants_vcf_output"
+                value="results/Nonessential_Gene_Variants.vcf"
+                ftype="vcf"
+                compare="diff">
+            </output>
         </test>
     </tests>
     <help><![CDATA[
 
-gNALI - Gene Nonessentiality and Loss-of-function Identifier
-============================================================
+Introduction
+------------------
 
-gNALI is a tool to find (high confidence) potential loss-of-function variants of genes.
+gNALI (gene nonessentiality and loss-of-function identifier) is a tool to find (high confidence) 
+potential loss of function variants of genes.
 
+NOTE: loss-of-function is influenced by the genome build. Not all variants available in gnomADv2.1.1 are
+available in gnomADv3 and vice versa.
 
-Authors
--------
+Usage
+-----------
 
-gNALI was developed by Xia Liu.
+Your input file must be of format .csv, .txt, or tsv and should contain a list of genes
+(as HGNC symbols) to test, separated by newline characters.
+It should not contain any blank lines until the end of the list.
 
 
-Usage
------
+**Population Frequencies**
 
-Accepted input formats: csv, txt, tsv
+When using the population frequencies feature:
+
+Per population group:
 
-Your input file should contain a list of genes (as HGNC symbols) to test, separated by newline characters.
-It should not contain any blank lines until the end of the list.
+* AC denotes allele count
 
-There will be two output files:
+* AN denotes allele number
 
-    1. A basic output file, containing genes (as HGNC symbols) with nonessential, loss-of-function variants.
-    2. A detailed output file, with more information on the variants.
+* AF denotes allele frequency
 
     ]]></help>
     <citations>