Mercurial > repos > nml > gnali
diff gnali.xml @ 3:02d368ec14cf draft
"planemo upload for repository https://github.com/phac-nml/gnali/ commit 48745557cc8e603b61ba1a78308f72562f559e59"
author | nml |
---|---|
date | Wed, 13 Jan 2021 18:35:15 +0000 |
parents | 49012f2b4c19 |
children | b6e197aac430 |
line wrap: on
line diff
--- a/gnali.xml Mon Apr 20 17:04:56 2020 -0400 +++ b/gnali.xml Wed Jan 13 18:35:15 2021 +0000 @@ -1,109 +1,122 @@ -<tool id="gnali" name="gNALI" version="0.1.1" python_template_version="3.6"> +<tool id="gnali" name="gNALI" version="1.0.1" python_template_version="3.7"> <description>Get nonessential, LoF variants</description> - <requirements> - <requirement type="package" version="0.1.1">gnali</requirement> - </requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ - gnali -i '$test_genes' -o output + gnali -i '$test_genes' -o output -d '$database_info.database' + #if $database_info.predefined_filters != "None": + --predefined_filters '$database_info.predefined_filters' + #end if + #if len($additional_filters) > 0: + --additional_filters + #for $filt in $additional_filters + '$filt.filter' + #end for + #end if + #if $vcf_output: + '$vcf_output' + #end if + #if $pop_freqs: + '$pop_freqs' + #end if ]]></command> <inputs> <param type="data" name="test_genes" label="Test genes" format="txt" help="Specify a list of genes as HGNC symbols, separated by newline characters" /> - <param type="select" name="database" label="Database" format="txt" help="Database to query" > - <option value="gnomad2.1.1" selected="true">gnomAD2.1.1 (GRCh37/hg19)</option> - </param> + <param name="vcf_output" type="boolean" truevalue="--vcf" falsevalue="" optional="false" checked="false" label="VCF output" help="Generate vcf file for filtered variants" /> + <param name="pop_freqs" type="boolean" truevalue="--pop_freqs" falsevalue="" optional="false" checked="false" label="Population frequencies" help="Generate population frequency data for variants that passed filtering" /> + <conditional name="database_info"> + <param type="select" name="database" label="Database" format="txt" help="Database to query" > + <option value="gnomadv2.1.1" selected="true">gnomADv2.1.1 (GRCh37/hg19)</option> + <option value="gnomadv3">gnomADv3 (GRCh38/hg38)</option> + </param> + <when value="gnomadv2.1.1"> + <param name="predefined_filters" type="select" display="checkboxes" multiple="True" label="Predefined filters" help="Filter variants by selected filters"> + <option value="homozygous-controls">homozygous controls (controls_nhomalt>0)</option> + <option value="heterozygous-controls">heterozygous controls (controls_nhomalt=0)</option> + <option value="nhomalt>0">homozygous (nhomalt>0)</option> + </param> + </when> + <when value="gnomadv3"> + <param name="predefined_filters" type="select" display="checkboxes" multiple="True" label="Predefined filters" help="Filter variants by selected filters"> + <option value="homozygous">homozygous (nhomalt>0)</option> + <option value="heterozygous">heterozygous (nhomalt=0)</option> + </param> + </when> + </conditional> + <repeat name="additional_filters" title="Additional filters" min="0" default="0" help="Additional filters (as expressions, ex. AC>10) to apply" > + <param name="filter" type="text" optional="False" label="Filter"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value=">" /> + <add value="<" /> + <add value="=" /> + </valid> + </sanitizer> + </param> + </repeat> </inputs> <outputs> - <data name="basic_output" label="gNALI basic output" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Basic\).txt" /> - <data name="detailed_output" label="gNALI detailed output" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Detailed\).txt" /> + <data name="basic_output" label="gNALI basic output on ${test_genes.element_identifier}" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Basic\).txt" /> + <data name="detailed_output" label="gNALI detailed output on ${test_genes.element_identifier}" format="txt" from_work_dir="output/Nonessential_Host_Genes_\(Detailed\).txt" /> + <data name="variants_vcf_output" label="gNALI variants vcf on ${test_genes.element_identifier}" format="vcf" from_work_dir="output/Nonessential_Gene_Variants.vcf" > + <filter>vcf_output</filter> + </data> </outputs> <tests> <test> <param name="test_genes" value="test_genes.txt"/> - <output name="basic_output"> - <assert_contents> - <has_text text="HGNC_Symbol" /> - <has_text text="CCR5" /> - </assert_contents> - </output> - <output name="detailed_output"> - <assert_contents> - <has_text_matching expression="Chromosome\tPosition_Start\tRSID\tReference_Allele\tAlternate_Allele\tScore\tQuality\tLoF_Variant\tLoF_Annotation\tHGNC_Symbol\tEnsembl Code" /> - <has_text_matching expression="3\t46414935\trs938517991\tAT\tA\t9974.16\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t74264261.52\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - <has_text_matching expression="3\t46415066\trs146972949\tC\tT\t120238.89\tPASS\tT\tstop_gained\tCCR5\tENSG00000160791" /> - <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t1947603.90\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - </assert_contents> + <param name="predefined_filters" value="homozygous-controls"/> + <param name="pop_freqs" value="--pop_freqs"/> + <param name="vcf_output" value="--vcf"/> + <output name="basic_output" + value="results/Nonessential_Host_Genes_Basic.txt" + ftype="txt" + compare="diff"> </output> - </test> - <test> - <param name="test_genes" value="patch.txt"/> - <output name="basic_output"> - <assert_contents> - <has_text text="HGNC_Symbol" /> - <has_text text="CCR5" /> - <has_text text="RPEL1" /> - <has_text text="OTOGL" /> - <has_text text="PKD1L2" /> - <has_text text="COL6A5" /> - <has_text text="DCP1A" /> - <has_text text="KRT10" /> - </assert_contents> - </output> - <output name="detailed_output"> - <assert_contents> - <has_text_matching expression="Chromosome\tPosition_Start\tRSID\tReference_Allele\tAlternate_Allele\tScore\tQuality\tLoF_Variant\tLoF_Annotation\tHGNC_Symbol\tEnsembl Code" /> - <has_text_matching expression="10\t105005931\trs61746130\tC\tT\t4480914.72\tPASS\tT\tstop_gained\tRPEL1\tENSG00000235376" /> - <has_text_matching expression="12\t80770908\trs1222716200\tC\tT\t4175.99\tPASS\tT\tstop_gained\tOTOGL\tENSG00000165899" /> - <has_text_matching expression="16\t81242148\trs752607955\tGTT\tG\t218022105.25\tPASS\t-\tframeshift_variant\tPKD1L2\tENSG00000166473" /> - <has_text_matching expression="16\t81242198\trs7499011\tG\tA\t124146106.95\tPASS\tA\tstop_gained\tPKD1L2\tENSG00000166473" /> - <has_text_matching expression="3\t130114290\trs115380050\tC\tT\t105366.23\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130139996\trs139339125\tG\tT\t576280.08\tPASS\tT\tsplice_acceptor_variant\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130159309\t.\tAAT\tA\t3284.41\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130159330\trs2201717\tC\tT\t8770317.31\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130187662\trs115375867\tG\tT\t1785133.61\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130190720\trs11355796\tAT\tA\t157372019.43\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t53324819\trs782498227\tATGGCAC\tA\t304085671.95\tPASS\t-\tsplice_donor_variant&intron_variant\tDCP1A\tENSG00000162290" /> - <has_text_matching expression="3\t46414935\trs938517991\tAT\tA\t9974.16\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t74264261.52\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - <has_text_matching expression="3\t46415066\trs146972949\tC\tT\t120238.89\tPASS\tT\tstop_gained\tCCR5\tENSG00000160791" /> - <has_text_matching expression="16\t81242148\trs752607955\tGTT\tG\t12370921.18\tPASS\t-\tframeshift_variant\tPKD1L2\tENSG00000166473" /> - <has_text_matching expression="16\t81242198\trs7499011\tG\tA\t7423817.85\tPASS\tA\tstop_gained\tPKD1L2\tENSG00000166473" /> - <has_text_matching expression="17\t38975327\trs764791942\tT\tTAGCCGCCGCC\t282793.87\tPASS\tAGCCGCCGCC\tframeshift_variant\tKRT10\tENSG00000186395" /> - <has_text_matching expression="17\t38975329\trs762667965\tG\tGAGCTT\t238711.24\tPASS\tAGCTT\tframeshift_variant\tKRT10\tENSG00000186395" /> - <has_text_matching expression="3\t130159330\trs2201717\tC\tT\t165579.83\tPASS\tT\tstop_gained\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t130190720\trs11355796\tAT\tA\t14066881.99\tPASS\t-\tframeshift_variant\tCOL6A5\tENSG00000172752" /> - <has_text_matching expression="3\t53324819\trs782498227\tATGGCAC\tA\t17202744.04\tPASS\t-\tsplice_donor_variant&intron_variant\tDCP1A\tENSG00000162290" /> - <has_text_matching expression="3\t46414943\trs775750898\tTACAGTCAGTATCAATTCTGGAAGAATTTCCAG\tT\t1947603.90\tPASS\t-\tframeshift_variant\tCCR5\tENSG00000160791" /> - </assert_contents> - </output> + <output name="detailed_output" + value="results/Nonessential_Host_Genes_Detailed.txt" + ftype="txt" + compare="diff"> + </output> + <output name="variants_vcf_output" + value="results/Nonessential_Gene_Variants.vcf" + ftype="vcf" + compare="diff"> + </output> </test> </tests> <help><![CDATA[ -gNALI - Gene Nonessentiality and Loss-of-function Identifier -============================================================ +Introduction +------------------ -gNALI is a tool to find (high confidence) potential loss-of-function variants of genes. +gNALI (gene nonessentiality and loss-of-function identifier) is a tool to find (high confidence) +potential loss of function variants of genes. +NOTE: loss-of-function is influenced by the genome build. Not all variants available in gnomADv2.1.1 are +available in gnomADv3 and vice versa. -Authors -------- +Usage +----------- -gNALI was developed by Xia Liu. +Your input file must be of format .csv, .txt, or tsv and should contain a list of genes +(as HGNC symbols) to test, separated by newline characters. +It should not contain any blank lines until the end of the list. -Usage ------ +**Population Frequencies** -Accepted input formats: csv, txt, tsv +When using the population frequencies feature: + +Per population group: -Your input file should contain a list of genes (as HGNC symbols) to test, separated by newline characters. -It should not contain any blank lines until the end of the list. +* AC denotes allele count -There will be two output files: +* AN denotes allele number - 1. A basic output file, containing genes (as HGNC symbols) with nonessential, loss-of-function variants. - 2. A detailed output file, with more information on the variants. +* AF denotes allele frequency ]]></help> <citations>