Mercurial > repos > iuc > snpsift
changeset 3:20c7d583fec1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit fbc18d9128669e461e76ed13307ee88dd774afa5
author | iuc |
---|---|
date | Mon, 12 Jun 2017 10:25:32 -0400 |
parents | bf8c1526871b |
children | b04635ebfab0 |
files | snpSift_annotate.xml snpSift_caseControl.xml snpSift_extractFields.xml snpSift_filter.xml snpSift_int.xml snpSift_macros.xml snpSift_rmInfo.xml snpSift_vartype.xml snpSift_vcfCheck.xml |
diffstat | 9 files changed, 240 insertions(+), 316 deletions(-) [+] |
line wrap: on
line diff
--- a/snpSift_annotate.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_annotate.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_annotate" name="SnpSift Annotate" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_annotate" name="SnpSift Annotate" version="@WRAPPER_VERSION@.0"> <description>SNPs from dbSnp</description> <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) @@ -10,22 +10,23 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" $annotate_cmd - #if $annotate.id : - -id - #elif str($annotate.info_ids).strip() != '' : - -info "$annotate.info_ids" - #end if - -q "$dbSnp" "$input" > "$output" -]]> - </command> +SnpSift annotate +#if $annotate.id == 'id': + -id +#elif str($annotate.info_ids).strip() != '': + -info "$annotate.info_ids" +#end if +-q '$dbSnp' '$input' > '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> - <param format="vcf" name="dbSnp" type="data" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> + <param name="dbSnp" type="data" format="vcf" label="VCF File with ID field annotated (e.g. dnSNP.vcf)" help="The ID field for a variant in input will be assigned from a matching variant in this file."/> <conditional name="annotate"> - <param name="id" type="boolean" truevalue="id" falsevalue="info" checked="True" label="Only annotate ID field (do not add INFO field)" help=""/> + <param name="id" type="select" label="Fields to annotate"> + <option value="id">Only annotate ID field (no INFO fields will be added)</option> + <option value="info">Add also INFO fields</option> + </param> <when value="id"/> <when value="info"> <param name="info_ids" type="text" value="" label="Limit INFO annotation to these INFO IDs" help="List is a comma separated list of fields. When blank, all INFO fields are included"> @@ -33,22 +34,15 @@ </param> </when> </conditional> - <param name="annotate_cmd" type="boolean" truevalue="annMem" falsevalue="annotate" checked="false" label="Allow unsorted VCF files"> - <help> - This option will load the entire 'database' VCF file into memory (which may not be practical for large 'database' VCF files). - Otherwise, both the database and the input VCF files should be sorted by position (Chromosome sort order can differ between files). - </help> - </param> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> <test> <param name="input" ftype="vcf" value="annotate_1.vcf"/> <param name="dbSnp" ftype="vcf" value="db_test_1.vcf"/> - <param name="annotate_cmd" value="False"/> - <param name="id" value="True"/> + <param name="id" value="id"/> <output name="output"> <assert_contents> <has_text text="rs76166080" /> @@ -57,7 +51,6 @@ </test> </tests> <help><![CDATA[ - This is typically used to annotate IDs from dbSnp. Annotatating only the ID field from dbSnp137.vcf :: @@ -74,8 +67,6 @@ 22 16346045 rs56234788 T C 0.0 FAIL NS=244 22 16350245 rs2905295 C A 0.0 FAIL NS=192 - - Annotatating both the ID and INFO fields from dbSnp137.vcf :: Input VCF: @@ -90,12 +81,8 @@ 22 16346045 rs56234788 T C 0.0 FAIL NS=244;RSPOS=16346045;GMAF=0.162248628884826;dbSNPBuildID=129;SSR=0;SAO=0;VP=050100000000000100000100;WGT=0;VC=SNV;SLO;GNO 22 16350245 rs2905295 C A 0.0 FAIL NS=192;RSPOS=16350245;GMAF=0.230804387568556;dbSNPBuildID=101;SSR=1;SAO=0;VP=050000000000000100000140;WGT=0;VC=SNV;GNO - @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#annotate - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#annotate + ]]></help> <expand macro="citations" /> </tool> -
--- a/snpSift_caseControl.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_caseControl.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_caseControl" name="SnpSift CaseControl" version="@WRAPPER_VERSION@.0"> <description>Count samples are in 'case' and 'control' groups.</description> <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) @@ -10,44 +10,42 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx1G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" caseControl -q - #if str($name).strip() != '': - -name "$name" - #end if - #if $ctrl.ctrl_src == 'caseString': - '$ctrl.caseControlStr' - #else - -tfam "$ctrl.tfam" - #end if - "$input" > "$output" -]]> - </command> +SnpSift -Xmx1G caseControl -q +#if $name.strip(): + -name '$name' +#end if +#if $ctrl.ctrl_src == 'caseString': + '$ctrl.caseControlStr' +#else + -tfam '$ctrl.tfam' +#end if +'$input' > '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> <conditional name="ctrl"> <param name="ctrl_src" type="select" label="Case Control defined in"> - <option value="caseString">Case Control String</option> - <option value="tfam">TFAM file</option> - </param> - <when value="caseString"> - <param name="caseControlStr" type="text" label="Case / Control column designation" size="50"> - <help> - Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral - </help> - <validator type="regex" message="must be only plus(+), minus(-), or zero(0) characters">[+-0]+</validator> + <option value="caseString">Case Control String</option> + <option value="tfam">TFAM file</option> </param> - </when> - <when value="tfam"> - <param format="tabular" name="tfam" type="data" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/> - </when> + <when value="caseString"> + <param name="caseControlStr" type="text" size="50" label="Case / Control column designation"> + <help> + Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral + </help> + <validator type="regex" message="must be only plus(+), minus(-), or zero(0) characters">[+-0]+</validator> + </param> + </when> + <when value="tfam"> + <param name="tfam" type="data" format="tabular" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/> + </when> </conditional> - <param name="name" type="text" label="name" help="name to append to the 'Cases' or 'Controls' tags"> + <param name="name" type="text" label="Name" help="Name to append to the 'Cases' or 'Controls' tags"> <validator type="regex" message="Use only valid ID characters">[_a-zA-Z0-9]*</validator> </param> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> <test> @@ -88,7 +86,6 @@ </test> </tests> <help><![CDATA[ - **SnpSift CaseControl** Allows you to count how many samples are in 'case' group and a 'control' group. You can count 'homozygous', 'heterozygous' or 'any' variants. @@ -97,29 +94,23 @@ This command adds two annotations to the VCF file: - - **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example: +- **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example:: - "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.* + "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.* - - **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example: +- **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example:: - "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03* - + "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03* For example, if we have ten samples (which means ten genotype columns in the VCF file), the first four are 'case' and the last six are 'control', so the description string would be "++++------". Let's say we want to distinguish genotypes that are homozygous in 'case' and either homozygous or heterozygous in 'control'. We would set: - - Hom/Het case = "hom" - - - Hom/Het control = "any" - - - Case / Control column designation = ""++++------" - +- Hom/Het case = "hom" +- Hom/Het control = "any" +- Case / Control column designation = ""++++------" @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#casecontrol - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#casecontrol + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_extractFields.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_extractFields.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> <options sanitize="False" /> <description>from a VCF file into a tabular file</description> <macros> @@ -8,32 +8,31 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - cat "$input" - #if $one_effect_per_line: - | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" - #end if - | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields - #if $separator: - -s '$separator' - #end if - #if $empty_text: - -e '$empty_text' - #end if - - - #echo ' '.join(['"%s"' % x for x in $extract.split()]) - > "$output" -]]> - </command> +@CONDA_SNPSIFT_JAR_PATH@ && +cat '$input' +#if $one_effect_per_line: + | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" +#end if +| SnpSift -Xmx6G extractFields +#if $separator: + -s '$separator' +#end if +#if $empty_text: + -e '$empty_text' +#end if +- +#echo ' '.join(['"%s"' % x for x in $extract.split()]) +> '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> </inputs> <outputs> - <data format="tabular" name="output" /> + <data name="output" format="tabular" /> </outputs> <tests> <test> @@ -57,19 +56,15 @@ </assert_contents> </output> </test> - </tests> <help><![CDATA[ - **SnpSift Extract Fields** Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. http://snpeff.sourceforge.net/SnpSift.html#Extract -You can also use sub-fields and genotype fields / sub-fields such as: - - :: +You can also use sub-fields and genotype fields / sub-fields such as:: Standard VCF fields: CHROM @@ -127,93 +122,77 @@ "NMD[*].NUMTR" "NMD[*].PERC" - Some examples: - - *Extracting chromosome, position, ID and allele frequency from a VCF file:* - - **CHROM POS ID AF** +- *Extracting chromosome, position, ID and allele frequency from a VCF file*: - The result will look something like: + **CHROM POS ID AF** - :: + The result will look something like:: - #CHROM POS ID AF - 1 69134 0.086 - 1 69496 rs150690004 0.001 - + #CHROM POS ID AF + 1 69134 0.086 + 1 69496 rs150690004 0.001 - - *Extracting genotype fields:* +- *Extracting genotype fields*: - **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** + **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** - This means to extract: + This means to extract: - - CHROM POS ID: regular fields (as in the previous example) - - THETA : This one is from INFO - - GEN[0].GL[1] : Second likelihood from first genotype - - GEN[1].GL : The whole GL fiels (all entries without separating them) - - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). - - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). - - The result will look something like: + - CHROM POS ID: regular fields (as in the previous example) + - THETA : This one is from INFO + - GEN[0].GL[1] : Second likelihood from first genotype + - GEN[1].GL : The whole GL fiels (all entries without separating them) + - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). + - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). - :: + The result will look something like:: - #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT - 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 - 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 - 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 + #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT + 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 + 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 + 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 - - *Extracting fields with multiple values:* - (notice that there are multiple effect columns per line because there are mutiple effects per variant) - - **CHROM POS REF ALT ANN[*].EFFECT** +- *Extracting fields with multiple values*: + (notice that there are multiple effect columns per line because there are mutiple effects per variant) - The result will look something like: + **CHROM POS REF ALT ANN[*].EFFECT** - :: + The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT - 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant - 22 17072035 C T missense_variant downstream_gene_variant - 22 17072258 C A missense_variant downstream_gene_variant + #CHROM POS REF ALT ANN[*].EFFECT + 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant + 22 17072035 C T missense_variant downstream_gene_variant + 22 17072258 C A missense_variant downstream_gene_variant - - *Extracting fields with multiple values using a comma as a multipe field separator:* - - **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** +- *Extracting fields with multiple values using a comma as a multipe field separator:* - The result will look something like: + **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** - :: + The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P - 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. - 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. - 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. + #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P + 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. + 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. + 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. +- *Extracting fields with multiple values, one effect per line:* - - *Extracting fields with multiple values, one effect per line:* - - **CHROM POS REF ALT ANN[*].EFFECT** + **CHROM POS REF ALT ANN[*].EFFECT** - The result will look something like: - - :: + The result will look something like:: - #CHROM POS REF ALT ANN[*].EFFECT - 22 17071756 T C 3_prime_UTR_variant - 22 17071756 T C downstream_gene_variant - 22 17072035 C T missense_variant - 22 17072035 C T downstream_gene_variant - 22 17072258 C A missense_variant - 22 17072258 C A downstream_gene_variant - + #CHROM POS REF ALT ANN[*].EFFECT + 22 17071756 T C 3_prime_UTR_variant + 22 17071756 T C downstream_gene_variant + 22 17072035 C T missense_variant + 22 17072035 C T downstream_gene_variant + 22 17072258 C A missense_variant + 22 17072258 C A downstream_gene_variant @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#Extract - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#Extract + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_filter.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_filter.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.0"> <description>Filter variants using arbitrary expressions</description> <macros> <import>snpSift_macros.xml</import> @@ -6,35 +6,32 @@ <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> - <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" filter -f "$input" -e "$exprFile" $inverse - #if $filtering.mode == 'field': - #if $filtering.replace.pass: - --pass - #if $filtering.replace.filterId and len(str($filtering.replace.filterId).strip()) > 0: - --filterId "$filtering.replace.filterId" - #end if - #end if - #if $filtering.addFilter and len(str($filtering.addFilter).strip()) > 0: - --addFilter "$filtering.addFilter" - #end if - #if $filtering.rmFilter and len(str($filtering.rmFilter).strip()) > 0: - --rmFilter "$filtering.rmFilter" - #end if +SnpSift -Xmx6G filter -f '$input' -e '$exprFile' $inverse +#if $filtering.mode == 'field': + #if $filtering.replace.pass: + --pass + #if $filtering.replace.filterId.strip(): + --filterId '$filtering.replace.filterId' #end if - > "$output" -]]> - </command> + #end if + #if $filtering.addFilter.strip(): + --addFilter '$filtering.addFilter' + #end if + #if $filtering.rmFilter.strip(): + --rmFilter '$filtering.rmFilter' + #end if +#end if +> '$output' + ]]></command> <configfiles> <configfile name="exprFile"> $expr#slurp </configfile> </configfiles> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> - <param name="expr" type="text" label="Filter criteria" help="Need help? See below a few examples." > + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> + <param name="expr" type="text" label="Filter criteria" help="Need help? See below a few examples"> <sanitizer sanitize="False"/> </param> <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" /> @@ -47,20 +44,20 @@ <when value="field"> <conditional name="replace"> <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" - help="appends an ID tag to non-matching entry FILTER "/> + help="appends an ID tag to non-matching entry FILTER" /> <when value="no"/> <when value="yes"> - <param name="filterId" type="text" value="" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)." help="Default ID is 'SnpSift'"/> + <param name="filterId" type="text" value="" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)" help="Default ID is 'SnpSift'"/> </when> </conditional> - <param name="addFilter" type="text" value="" label="Add a string to FILTER VCF field if 'expression' is true." /> - <param name="rmFilter" type="text" value="" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)." /> + <param name="addFilter" type="text" value="" label="Add a string to FILTER VCF field if 'expression' is true" /> + <param name="rmFilter" type="text" value="" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)" /> </when> </conditional> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> <test> @@ -116,80 +113,57 @@ </test> </tests> <help><![CDATA[ - **SnpSift filter** You can filter a VCF file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility. Some examples: - - *I want just the variants from the second million bases of chr1*: +- *I want just the variants from the second million bases of chr1*:: - :: - - ( CHROM = 'chr1' ) & ( POS > 1000000 ) & ( POS < 2000000 ) + ( CHROM = 'chr1' ) & ( POS > 1000000 ) & ( POS < 2000000 ) - - *Filter value is either 'PASS' or it is missing*: - - :: +- *Filter value is either 'PASS' or it is missing*:: - (FILTER = 'PASS') | ( na FILTER ) - - - *I want to filter lines with an ANN annotation EFFECT of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*: + (FILTER = 'PASS') | ( na FILTER ) - :: +- *I want to filter lines with an ANN annotation EFFECT of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*:: - ( ANN[*].EFFECT has 'frameshift_variant' ) - - **Important** According to the specification, there can be more than one EFFECT separated by & (e.g. 'missense_variant&splice_region_variant', thus using has operator is better than using equality operator (=). For instance 'missense_variant&splice_region_variant' = 'missense_variant' is false, whereas 'missense_variant&splice_region_variant' has 'missense_variant' is true. + ( ANN[*].EFFECT has 'frameshift_variant' ) - - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*: - - :: + **Important** According to the specification, there can be more than one EFFECT separated by & (e.g. 'missense_variant&splice_region_variant', thus using has operator is better than using equality operator (=). For instance 'missense_variant&splice_region_variant' = 'missense_variant' is false, whereas 'missense_variant&splice_region_variant' has 'missense_variant' is true. - ( EFF[*].EFFECT = 'FRAME_SHIFT' ) +- *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*:: - - *I want to filter out samples with quality less than 30*: + ( EFF[*].EFFECT = 'FRAME_SHIFT' ) - :: +- *I want to filter out samples with quality less than 30*:: - ( QUAL > 30 ) - - - *...but we also want InDels that have quality 20 or more*: + ( QUAL > 30 ) - :: - - (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) +- *...but we also want InDels that have quality 20 or more*:: - - *...or any homozygous variant present in more than 3 samples*: + (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) - :: +- *...or any homozygous variant present in more than 3 samples*:: - (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) + (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) - - *...or any heterozygous sample with coverage 25 or more*: - - :: +- *...or any heterozygous sample with coverage 25 or more*:: - ((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) - - - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*: + ((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) - :: +- *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*:: - (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] )) - + (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] )) **For information regarding HGVS and Sequence Ontology terms versus classic names**: - - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology - - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect - +- http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology +- http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#filter - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#filter + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_int.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_int.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,6 +1,6 @@ -<tool id="snpSift_int" name="SnpSift Intervals" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_int" name="SnpSift Intervals" version="@WRAPPER_VERSION@.0"> <description>Filter variants using intervals</description> - <!-- + <!-- You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) --> <macros> @@ -10,18 +10,20 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx2G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" intervals -i "$input" $exclude "$bedFile" > "$output" -]]> - </command> +SnpSift -Xmx2G intervals +-i '$input' +$exclude +'$bedFile' +> '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> - <param format="bed" name="bedFile" type="data" label="Intervals (BED file)"/> - <param name="exclude" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Exclude Intervals" + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> + <param name="bedFile" type="data" format="bed" label="Intervals (BED file)"/> + <param name="exclude" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Exclude Intervals" help="Filter out (exclude) VCF entries that match any interval in the BED files"/> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> <test> @@ -48,13 +50,10 @@ </test> </tests> <help><![CDATA[ - You can filter using intervals (BED file). @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#intervals - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#intervals + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_macros.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_macros.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,33 +1,31 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="4.1">snpsift</requirement> + <requirement type="package" version="4.3.1p">snpsift</requirement> </requirements> </xml> - <xml name="stdio"> - <stdio> - <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> - <exit_code range="1:" level="fatal" description="Error" /> - </stdio> - </xml> - <token name="@CONDA_SNPSIFT_JAR_PATH@">if [ -z "\$SNPSIFT_JAR_PATH" ]; then export SNPSIFT_JAR_PATH=\$(dirname \$(readlink -e \$(which SnpSift))); fi</token> - <xml name="version_command"> - <version_command><![CDATA[ - if [ -z "$SNPSIFT_JAR_PATH" ]; then export SNPSIFT_JAR_PATH=$(dirname $(readlink -e $(which SnpSift))); fi && - java -jar "$SNPSIFT_JAR_PATH/SnpSift.jar" dbnsfp 2>&1|head -n 1 - ]]></version_command> - </xml> - <token name="@WRAPPER_VERSION@">4.1</token> - <token name="@EXTERNAL_DOCUMENTATION@"> + <xml name="stdio"> + <stdio> + <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@CONDA_SNPSIFT_JAR_PATH@">if [ -z "\$SNPSIFT_JAR_PATH" ]; then export SNPSIFT_JAR_PATH=\$(dirname \$(readlink -e \$(which SnpSift))); fi</token> + <xml name="version_command"> + <version_command><![CDATA[ +SnpSift dbnsfp 2>&1|head -n 1 + ]]></version_command> + </xml> + <token name="@WRAPPER_VERSION@">4.3</token> + <token name="@EXTERNAL_DOCUMENTATION@"> +For details about this tool, please go to: -For details about this tool, please go to: - http://snpeff.sourceforge.net/SnpEff_manual.html - - </token> - <xml name="citations"> - <citations> - <citation type="doi">10.3389/fgene.2012.00035</citation> - <yield /> - </citations> - </xml> +- http://snpeff.sourceforge.net/SnpEff_manual.html + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.3389/fgene.2012.00035</citation> + <yield /> + </citations> + </xml> </macros>
--- a/snpSift_rmInfo.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_rmInfo.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_rmInfo" name="SnpSift rmInfo" version="@WRAPPER_VERSION@.0"> <description>remove INFO field annotations</description> <macros> <import>snpSift_macros.xml</import> @@ -7,21 +7,20 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx2G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" rmInfo "$input" - #echo ' '.join($info_fields.split(',')) - > "$output" -]]> - </command> +SnpSift -Xmx2G rmInfo +'$input' +#echo ' '.join($info_fields.split(',')) +> '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> + <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> <param name="info_fields" type="text" value="" label="Info fields to remove, e.g. EFF"> <help>Separate multiple INFO fields with a comma, e.g.: EFF,DP</help> <validator type="empty_field" /> </param> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> <test> @@ -38,15 +37,13 @@ <help><![CDATA[ This command removes INFO fields from a VCF file (i.e. removes annotations) -Removing INFO fields is usually done because you want to re-annotate the VCF file, thus removing old INFO fields in order to add new ones later. +Removing INFO fields is usually done because you want to re-annotate the VCF file, thus removing old INFO fields in order to add new ones later. -SnpEff & SnpSift only add annotations and do not change current ones. So, in order to re-annotate a file, you should first remove the old annotations and then re-annotate. -The reason for this behavior is simply because replacing annotation values is considered a bad practice. Imagine that you have a VCF entry in your re-annotated file having the value "AA=1": How do you know if this is from the old annotations or from the new ones? This confusion often leads to problems in downstream steps of your pipelines, so it's better to avoid the problem by first removing all the previous annotations and then adding the new ones. +SnpEff & SnpSift only add annotations and do not change current ones. So, in order to re-annotate a file, you should first remove the old annotations and then re-annotate. +The reason for this behavior is simply because replacing annotation values is considered a bad practice. Imagine that you have a VCF entry in your re-annotated file having the value "AA=1": How do you know if this is from the old annotations or from the new ones? This confusion often leads to problems in downstream steps of your pipelines, so it's better to avoid the problem by first removing all the previous annotations and then adding the new ones. @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#rmInfo - -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#rmInfo + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_vartype.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_vartype.xml Mon Jun 12 10:25:32 2017 -0400 @@ -7,16 +7,17 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" varType "$input" 2> "$log" > "$output" -]]> - </command> +SnpSift varType +'$input' +2> '$log' +> '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant file (VCF)"/> + <param name="input" type="data" format="vcf" label="Variant file (VCF)"/> </inputs> <outputs> - <data format="vcf" name="output" label="${tool.name} on ${on_string}: VCF" /> - <data format="txt" name="log" label="${tool.name} on ${on_string}: log" /> + <data name="output" format="vcf" label="${tool.name} on ${on_string}: VCF" /> + <data name="log" format="txt" label="${tool.name} on ${on_string}: log" /> </outputs> <tests> </tests> @@ -35,7 +36,6 @@ .. _CRS4 Srl.: http://www.crs4.it/ .. _MIT license: https://opensource.org/licenses/MIT -]]> - </help> + ]]></help> <expand macro="citations" /> </tool>
--- a/snpSift_vcfCheck.xml Mon Dec 05 12:11:18 2016 -0500 +++ b/snpSift_vcfCheck.xml Mon Jun 12 10:25:32 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="@WRAPPER_VERSION@.1"> +<tool id="snpSift_vcfCheck" name="SnpSift vcfCheck" version="@WRAPPER_VERSION@.0"> <description>basic checks for VCF specification compliance</description> <macros> <import>snpSift_macros.xml</import> @@ -7,18 +7,18 @@ <expand macro="stdio" /> <expand macro="version_command" /> <command><![CDATA[ - @CONDA_SNPSIFT_JAR_PATH@ && - java -Xmx2G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" vcfCheck "$input" > "$output" -]]> - </command> +SnpSift -Xmx2G vcfCheck +'$input' +> '$output' + ]]></command> <inputs> - <param format="vcf" name="input" type="data" label="Variant input file in VCF format to check"/> + <param name="input" type="data" format="vcf" label="Variant input file in VCF format to check"/> </inputs> <outputs> - <data format="vcf" name="output" /> + <data name="output" format="vcf" /> </outputs> <tests> - <test> + <test expect_exit_code="0"> <param name="input" ftype="vcf" value="test_bad.vcf"/> <assert_stderr> <has_text text="Errors" /> @@ -31,8 +31,7 @@ SnpSift vcfCheck checks for some common problems where VCF files are not following the specification. Given that many common VCF problems cause analysis tools and pipelines to behave unexpectedly, this command is intended as a simple debugging tool. @EXTERNAL_DOCUMENTATION@ - http://snpeff.sourceforge.net/SnpSift.html#vcfCheck -]]> - </help> +- http://snpeff.sourceforge.net/SnpSift.html#vcfCheck + ]]></help> <expand macro="citations" /> </tool>