Next changeset 1:b3fe10e4b93f (2015-12-11) |
Commit message:
Uploaded |
added:
LICENSE filter_vcf.py filter_vcf.sh filter_vcf.xml phe/__init__.py phe/metadata/__init__.py phe/variant/GATKVariantCaller.py phe/variant/MPileupVariantCaller.py phe/variant/__init__.py phe/variant/variant_factory.py phe/variant_filters/ADFilter.py phe/variant_filters/DP4Filter.py phe/variant_filters/DepthFilter.py phe/variant_filters/GQFilter.py phe/variant_filters/GTFilter.py phe/variant_filters/MQ0FFilter.py phe/variant_filters/MQ0Filter.py phe/variant_filters/MQFilter.py phe/variant_filters/QualFilter.py phe/variant_filters/__init__.py test-data/test_input.vcf test-data/test_output.vcf tool_dependencies.xml |
b |
diff -r 000000000000 -r 834a312c0114 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Thu Dec 10 09:22:39 2015 -0500 |
b |
b'@@ -0,0 +1,676 @@\n+\n+\n+ GNU GENERAL PUBLIC LICENSE\n+ Version 3, 29 June 2007\n+\n+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+ Preamble\n+\n+ The GNU General Public License is a free, copyleft license for\n+software and other kinds of works.\n+\n+ The licenses for most software and other practical works are designed\n+to take away your freedom to share and change the works. By contrast,\n+the GNU General Public License is intended to guarantee your freedom to\n+share and change all versions of a program--to make sure it remains free\n+software for all its users. We, the Free Software Foundation, use the\n+GNU General Public License for most of our software; it applies also to\n+any other work released this way by its authors. You can apply it to\n+your programs, too.\n+\n+ When we speak of free software, we are referring to freedom, not\n+price. Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+them if you wish), that you receive source code or can get it if you\n+want it, that you can change the software or use pieces of it in new\n+free programs, and that you know you can do these things.\n+\n+ To protect your rights, we need to prevent others from denying you\n+these rights or asking you to surrender the rights. Therefore, you have\n+certain responsibilities if you distribute copies of the software, or if\n+you modify it: responsibilities to respect the freedom of others.\n+\n+ For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must pass on to the recipients the same\n+freedoms that you received. You must make sure that they, too, receive\n+or can get the source code. And you must show them these terms so they\n+know their rights.\n+\n+ Developers that use the GNU GPL protect your rights with two steps:\n+(1) assert copyright on the software, and (2) offer you this License\n+giving you legal permission to copy, distribute and/or modify it.\n+\n+ For the developers\' and authors\' protection, the GPL clearly explains\n+that there is no warranty for this free software. For both users\' and\n+authors\' sake, the GPL requires that modified versions be marked as\n+changed, so that their problems will not be attributed erroneously to\n+authors of previous versions.\n+\n+ Some devices are designed to deny users access to install or run\n+modified versions of the software inside them, although the manufacturer\n+can do so. This is fundamentally incompatible with the aim of\n+protecting users\' freedom to change the software. The systematic\n+pattern of such abuse occurs in the area of products for individuals to\n+use, which is precisely where it is most unacceptable. Therefore, we\n+have designed this version of the GPL to prohibit the practice for those\n+products. If such problems arise substantially in other domains, we\n+stand ready to extend this provision to those domains in future versions\n+of the GPL, as needed to protect the freedom of users.\n+\n+ Finally, every program is threatened constantly by software patents.\n+States should not allow patents to restrict development and use of\n+software on general-purpose computers, but in those that do, we wish to\n+avoid the special danger that patents applied to a free program could\n+make it effectively proprietary. To prevent this, the GPL assures that\n+patents cannot be used to render the program non-free.\n+\n+ The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+ TERMS AND CONDITIONS\n+\n+ 0. Definitions.\n+\n+ "This License" refers to version 3 of the GNU General Public License.\n+\n+ "Copyright" also means copyright-like laws that apply to other kinds of\n+works, such as semiconductor masks.\n+\n+ "The Program" refers '..b'CE OF THE PROGRAM\n+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+ 16. Limitation of Liability.\n+\n+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n+SUCH DAMAGES.\n+\n+ 17. Interpretation of Sections 15 and 16.\n+\n+ If the disclaimer of warranty and limitation of liability provided\n+above cannot be given local legal effect according to their terms,\n+reviewing courts shall apply local law that most closely approximates\n+an absolute waiver of all civil liability in connection with the\n+Program, unless a warranty or assumption of liability accompanies a\n+copy of the Program in return for a fee.\n+\n+ END OF TERMS AND CONDITIONS\n+\n+ How to Apply These Terms to Your New Programs\n+\n+ If you develop a new program, and you want it to be of the greatest\n+possible use to the public, the best way to achieve this is to make it\n+free software which everyone can redistribute and change under these terms.\n+\n+ To do so, attach the following notices to the program. It is safest\n+to attach them to the start of each source file to most effectively\n+state the exclusion of warranty; and each file should have at least\n+the "copyright" line and a pointer to where the full notice is found.\n+\n+ {one line to give the program\'s name and a brief idea of what it does.}\n+ Copyright (C) {year} {name of author}\n+\n+ This program is free software: you can redistribute it and/or modify\n+ it under the terms of the GNU General Public License as published by\n+ the Free Software Foundation, either version 3 of the License, or\n+ (at your option) any later version.\n+\n+ This program is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+ GNU General Public License for more details.\n+\n+ You should have received a copy of the GNU General Public License\n+ along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+ If the program does terminal interaction, make it output a short\n+notice like this when it starts in an interactive mode:\n+\n+ {project} Copyright (C) {year} {fullname}\n+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n+ This is free software, and you are welcome to redistribute it\n+ under certain conditions; type `show c\' for details.\n+\n+The hypothetical commands `show w\' and `show c\' should show the appropriate\n+parts of the General Public License. Of course, your program\'s commands\n+might be different; for a GUI interface, you would use an "about box".\n+\n+ You should also get your employer (if you work as a programmer) or school,\n+if any, to sign a "copyright disclaimer" for the program, if necessary.\n+For more information on this, and how to apply and follow the GNU GPL, see\n+<http://www.gnu.org/licenses/>.\n+\n+ The GNU General Public License does not permit incorporating your program\n+into proprietary programs. If your program is a subroutine library, you\n+may consider it more useful to permit linking proprietary applications with\n+the library. If this is what you want to do, use the GNU Lesser General\n+Public License instead of this License. But first, please read\n+<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n' |
b |
diff -r 000000000000 -r 834a312c0114 filter_vcf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_vcf.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,34 @@ +#!/usr/bin/env python +'''Simple VCF parser using custom filters. + +Created on 6 Oct 2015 + +@author: alex +''' +import argparse + +from phe.variant import VariantSet + + +def get_args(): + + args = argparse.ArgumentParser() + + args.add_argument("--vcf", "-v", required=True, help="VCF file to (re)filter.") + args.add_argument("--filters", "-f", required=True, help="Filter(s) to apply as key:threshold pairs, separated by comma.") + args.add_argument("--output", "-o", required=True, help="Location for filtered VCF to be written.") + + return args.parse_args() + + +def main(): + args = get_args() + + var_set = VariantSet(args.vcf, filters=args.filters) + + var_set.filter_variants() + + var_set.serialise(args.output) + +if __name__ == '__main__': + exit(main()) |
b |
diff -r 000000000000 -r 834a312c0114 filter_vcf.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_vcf.sh Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,14 @@ +#!/bin/bash + +OUTPUT=$1 +shift +VCF=$1 +shift +FILTERS=$@ + +FILTERS=$(echo -n $FILTERS | sed 's/ /,/g') + +CMD="filter_vcf.py --vcf $VCF --filters $FILTERS --output $OUTPUT" + +echo "CMD: "$CMD +eval $CMD |
b |
diff -r 000000000000 -r 834a312c0114 filter_vcf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_vcf.xml Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,99 @@ +<tool id="filter_vcf" name="Filter VCF" version="1.0"> + <description>filters a VCF file</description> + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="0.6.7">pyvcf</requirement> + <requirement type="package" version="3.11">pyyaml</requirement> + </requirements> + <stdio> + <!-- Assume anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <command interpreter="bash"> + filter_vcf.sh + $output + $input + #for $sf in $snp_filter: + #for $name, $param in $sf.snp_filter_type.iteritems(): + #if $name not in [ "__current_case__", "snp_filter_type_selector" ]: + ${name}:"${param}" + #end if + #end for + #end for + </command> + + <inputs> + <param name="input" type="data" format="vcf" label="VCF File to filter" /> + + <repeat name="snp_filter" title="SNP Filter" help=""> + <conditional name="snp_filter_type"> + <param name="snp_filter_type_selector" type="select" label="SNP Filter Type"> + <option value="gq_score_option">GQ score</option> + <option value="dp4_ratio_option">DP4 ratio</option> + <option value="ad_ratio_option">AD ratio</option> + <option value="mq_score_option">MQ score</option> + <option value="min_depth_option">Minimum depth</option> + <option value="uncall_gt_option">Uncall GT</option> + <option value="mq0_ratio_option">MQ0 ratio</option> + <option value="qual_score_option">Quality score</option> + <option value="mq0f_ratio_option">MQ0F ratio</option> + </param> + <when value="gq_score_option"> + <param name="gq_score" type="integer" value="0" label="Minimum GC score" help="Type integer"/> + </when> + <when value="dp4_ratio_option"> + <param name="dp4_ratio" type="float" value="0.9" label="Minimum DP4 ratio" help="Type float"/> + </when> + <when value="ad_ratio_option"> + <param name="ad_ratio" type="float" value="0.9" label="Minimum AD ratio" help="Type float"/> + </when> + <when value="mq_score_option"> + <param name="mq_score" type="integer" value="30" label="Minimum MQ score" help="Type integer"/> + </when> + <when value="min_depth_option"> + <param name="min_depth" type="integer" value="5" label="Minimum depth" help="Type integer"/> + </when> + <when value="uncall_gt_option"> + <param name="uncall_gt" type="text" value="" hidden="True"/> + </when> + <when value="mq0_ratio_option"> + <param name="mq0_ratio" type="float" value="0.05" label="Minimim MQ0 ratio" help="Type float"/> + </when> + <when value="qual_score_option"> + <param name="qual_score" type="integer" value="30" label="Minimim quality score" help="Type integer"/> + </when> + <when value="mq0f_ratio_option"> + <param name="mq0f_ratio" type="float" value="0.05" label="Minimum MQ0F ratio" help="Type float"/> + </when> + </conditional> + </repeat> + + </inputs> + + <outputs> + <data format="vcf" name="output" label="${tool.name} on ${on_string}: filtered VCF" /> + </outputs> + <tests> + <test> + <param name="input" value="test_input.vcf" ftype="vcf" /> + <param name="min_depth" value="100" /> + <output name="output" file="test_output.vcf" ftype="vcf" /> + </test> + </tests> + <help> + +usage: filter_vcf.py [-h] \-\-vcf VCF \-\-filters FILTERS \-\-output OUTPUT + +optional arguments: + +-h, \-\-help show this help message and exit + +\-\-vcf VCF, -v VCF VCF file to (re)filter. + +\-\-filters FILTERS, -f FILTERS Filter(s) to apply as key:threshold pairs, separated by comma. + +\-\-output OUTPUT, -o OUTPUT Location for filtered VCF to be written. + + </help> +</tool> |
b |
diff -r 000000000000 -r 834a312c0114 phe/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/__init__.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,4 @@ +if __name__ == "phe": + # If this package is added as library, append extended path. + from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) \ No newline at end of file |
b |
diff -r 000000000000 -r 834a312c0114 phe/metadata/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/metadata/__init__.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,16 @@ +"""Metadata related information.""" + +import abc + +class PHEMetaData(object): + """Abstract class to provide interface for meta-data creation.""" + + __metaclass__ = abc.ABCMeta + + def __init__(self): + pass + + @abc.abstractmethod + def get_meta(self): + """Get the metadata.""" + raise NotImplementedError("get meta has not been implemented yet.") |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant/GATKVariantCaller.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant/GATKVariantCaller.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,126 @@ +''' +Created on 22 Sep 2015 + +@author: alex +''' +from collections import OrderedDict +import logging +import os +import subprocess + +from phe.variant import VariantCaller + + +class GATKVariantCaller(VariantCaller): + """Implemetation of the Broad institute's variant caller.""" + + name = "gatk" + """Plain text name of the variant caller.""" + + _default_options = "--sample_ploidy 2 --genotype_likelihoods_model BOTH -rf BadCigar -out_mode EMIT_ALL_SITES -nt 1" + """Default options for the variant caller.""" + + def __init__(self, cmd_options=None): + """Constructor""" + if cmd_options is None: + cmd_options = self._default_options + + super(GATKVariantCaller, self).__init__(cmd_options=cmd_options) + + self.last_command = None + + def get_info(self, plain=False): + d = {"name": "gatk", "version": self.get_version(), "command": self.last_command} + + if plain: + result = "GATK(%(version)s): %(command)s" % d + else: + result = OrderedDict(d) + + return result + + def get_version(self): + + p = subprocess.Popen(["java", "-jar", os.environ["GATK_JAR"], "-version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + (output, _) = p.communicate() + + # last character is EOL. + version = output.split("\n")[-2] + + return version + + def make_vcf(self, *args, **kwargs): + ref = kwargs.get("ref") + bam = kwargs.get("bam") + + if kwargs.get("vcf_file") is None: + kwargs["vcf_file"] = "variants.vcf" + + opts = {"ref": os.path.abspath(ref), + "bam": os.path.abspath(bam), + "gatk_jar": os.environ["GATK_JAR"], + "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), + "extra_cmd_options": self.cmd_options} + +# if not self.create_aux_files(ref): +# logging.warn("Auxiliary files were not created.") +# return False + + # Call variants + # FIXME: Sample ploidy = 2? + os.environ["GATK_JAR"] + cmd = "java -XX:+UseSerialGC -jar %(gatk_jar)s -T UnifiedGenotyper -R %(ref)s -I %(bam)s -o %(all_variants_file)s %(extra_cmd_options)s" % opts + success = os.system(cmd) + + if success != 0: + logging.warn("Calling variants returned non-zero exit status.") + return False + + self.last_command = cmd + + return True + + def create_aux_files(self, ref): + """Create auxiliary files needed for this variant. + + Tools needed: samtools and picard tools. Picard tools is a Java + library that can be defined using environment variable: PICARD_JAR + specifying path to picard.jar or PICARD_TOOLS_PATH specifying path + to the directory where separate jars are (older version before jars + were merged into a single picard.jar). + Parameters: + ----------- + ref: str + Path to the reference file. + + Returns: + -------- + bool: + True if auxiliary files were created, False otherwise. + """ + + ref_name, _ = os.path.splitext(ref) + + success = os.system("samtools faidx %s" % ref) + + if success != 0: + logging.warn("Fasta index could not be created.") + return False + + d = {"ref": ref, "ref_name": ref_name} + + if os.environ.get("PICARD_TOOLS_PATH"): + d["picard_tools_path"] = os.path.join(os.environ["PICARD_TOOLS_PATH"], "CreateSequenceDictionary.jar") + elif os.environ.get("PICARD_JAR"): + # This is used in newer version of PICARD tool where multiple + # jars were merged into a single jar file. + d["picard_tools_path"] = "%s %s" % (os.environ["PICARD_JAR"], "CreateSequenceDictionary") + else: + logging.error("Picard tools are not present in the path.") + return False + + success = os.system("java -jar %(picard_tools_path)s R=%(ref)s O=%(ref_name)s.dict" % d) + + if success != 0: + logging.warn("Dictionary for the %s reference could not be created", ref) + return False |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant/MPileupVariantCaller.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant/MPileupVariantCaller.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,96 @@ +''' +Created on 22 Sep 2015 + +@author: alex +''' +from collections import OrderedDict +import logging +import os +import subprocess +import tempfile + +from phe.variant import VariantCaller + + +class MPileupVariantCaller(VariantCaller): + """Implemetation of the Broad institute's variant caller.""" + + name = "mpileup" + """Plain text name of the variant caller.""" + + _default_options = "-m -f GQ" + """Default options for the variant caller.""" + + def __init__(self, cmd_options=None): + """Constructor""" + if cmd_options is None: + cmd_options = self._default_options + + super(MPileupVariantCaller, self).__init__(cmd_options=cmd_options) + + self.last_command = None + + def get_info(self, plain=False): + d = {"name": self.name, "version": self.get_version(), "command": self.last_command} + + if plain: + result = "mpileup(%(version)s): %(command)s" % d + else: + result = OrderedDict(d) + + return result + + def get_version(self): + + p = subprocess.Popen(["samtools", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + (output, _) = p.communicate() + + # first line is the version of the samtools + version = output.split("\n")[0].split(" ")[1] + + return version + + def make_vcf(self, *args, **kwargs): + ref = kwargs.get("ref") + bam = kwargs.get("bam") + + if kwargs.get("vcf_file") is None: + kwargs["vcf_file"] = "variants.vcf" + + opts = {"ref": os.path.abspath(ref), + "bam": os.path.abspath(bam), + "all_variants_file": os.path.abspath(kwargs.get("vcf_file")), + "extra_cmd_options": self.cmd_options} + + with tempfile.NamedTemporaryFile(suffix=".pileup") as tmp: + opts["pileup_file"] = tmp.name + cmd = "samtools mpileup -t DP,DV,DP4,DPR,SP -Auf %(ref)s %(bam)s | bcftools call %(extra_cmd_options)s > %(all_variants_file)s" % opts + print cmd + self.last_command = cmd + if os.system(cmd) != 0: + logging.warn("Pileup creation was not successful.") + return False + + return True + + def create_aux_files(self, ref): + """Index reference with faidx from samtools. + + Parameters: + ----------- + ref: str + Path to the reference file. + + Returns: + -------- + bool: + True if auxiliary files were created, False otherwise. + """ + + success = os.system("samtools faidx %s" % ref) + + if success != 0: + logging.warn("Fasta index could not be created.") + return False + + return True |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant/__init__.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
b'@@ -0,0 +1,288 @@\n+"""Classes and methods to work with variants and such."""\n+import abc\n+#ulf\n+# from collections import OrderedDict\n+try:\n+ from collections import OrderedDict\n+except ImportError:\n+ from ordereddict import OrderedDict\n+ \n+import logging\n+import pickle\n+\n+from vcf import filters\n+import vcf\n+from vcf.parser import _Filter\n+\n+from phe.metadata import PHEMetaData\n+from phe.variant_filters import make_filters, PHEFilterBase, str_to_filters\n+\n+\n+class VCFTemplate(object):\n+ """This is a small hack class for the Template used in generating\n+ VCF file."""\n+\n+ def __init__(self, vcf_reader):\n+ self.infos = vcf_reader.infos\n+ self.formats = vcf_reader.formats\n+ self.filters = vcf_reader.filters\n+ self.alts = vcf_reader.alts\n+ self.contigs = vcf_reader.contigs\n+ self.metadata = vcf_reader.metadata\n+ self._column_headers = vcf_reader._column_headers\n+ self.samples = vcf_reader.samples\n+\n+class VariantSet(object):\n+ """A convenient representation of set of variants.\n+ TODO: Implement iterator and generator for the variant set.\n+ """\n+\n+ _reader = None\n+\n+ def __init__(self, vcf_in, filters=None):\n+ """Constructor of variant set.\n+\n+ Parameters:\n+ -----------\n+ vcf_in: str\n+ Path to the VCF file for loading information.\n+ filters: str or dict, optional\n+ Dictionary or string of the filter:threshold key value pairs.\n+ """\n+ self.vcf_in = vcf_in\n+ self._reader = vcf.Reader(filename=vcf_in)\n+ self.out_template = VCFTemplate(self._reader)\n+\n+ self.filters = []\n+ if filters is not None:\n+ if isinstance(filters, str):\n+ self.filters = str_to_filters(filters)\n+ elif isinstance(filters, dict):\n+ self.filters = make_filters(config=filters)\n+ elif isinstance(filters, list):\n+ self.filters = filters\n+ else:\n+ logging.warn("Could not create filters from %s", filters)\n+ else:\n+ reader = vcf.Reader(filename=self.vcf_in)\n+ filters = {}\n+ for filter_id in reader.filters:\n+ filters.update(PHEFilterBase.decode(filter_id))\n+\n+ if filters:\n+ self.filters = make_filters(config=filters)\n+\n+ self.variants = []\n+\n+ def filter_variants(self, keep_only_snps=True):\n+ """Create a variant """\n+\n+ if self._reader is None:\n+ # Create a reader class from input VCF.\n+ self._reader = vcf.Reader(filename=self.vcf_in)\n+\n+ # get list of existing filters.\n+ existing_filters = {}\n+ removed_filters = []\n+\n+ for filter_id in self._reader.filters:\n+ conf = PHEFilterBase.decode(filter_id)\n+ tuple(conf.keys())\n+ existing_filters.update({tuple(conf.keys()):filter_id})\n+\n+ # Add each filter we are going to use to the record.\n+ # This is needed for writing out proper #FILTER header in VCF.\n+ for record_filter in self.filters:\n+ # We know that each filter has short description method.\n+ short_doc = record_filter.short_desc()\n+ short_doc = short_doc.split(\'\\n\')[0].lstrip()\n+\n+ filter_name = PHEFilterBase.decode(record_filter.filter_name())\n+\n+ # Check if the sample has been filtered for this type of filter\n+ # in the past. If so remove is, because it is going to be refiltered.\n+ if tuple(filter_name) in existing_filters:\n+ logging.info("Removing existing filter: %s", existing_filters[tuple(filter_name)])\n+ removed_filters.append(existing_filters[tuple(filter_name)])\n+ del self._reader.filters[existing_filters[tuple(filter_name)]]\n+\n+ self._reader.filters[record_filter.filter_name()] = _Filter(record_filter.filter_name(), short_doc)\n+\n+ '..b' (default: False).\n+\n+ Returns:\n+ int:\n+ Number of records written.\n+ """\n+ written_variants = 0\n+ with open(vcf_out, "w") as out_vcf:\n+ writer = vcf.Writer(out_vcf, self.out_template)\n+ for record in self.variants:\n+\n+ if only_snps and not record.is_snp:\n+ continue\n+\n+ if only_good and record.FILTER != "PASS" or record.FILTER is None:\n+ continue\n+\n+ writer.write_record(record)\n+ written_variants += 1\n+\n+ return written_variants\n+\n+ def _write_bad_variants(self, vcf_out):\n+ """**PRIVATE:** Write only those records that **haven\'t** passed."""\n+ written_variants = 0\n+ with open(vcf_out, "w") as out_vcf:\n+ writer = vcf.Writer(out_vcf, self.out_template)\n+ for record in self.variants:\n+ if record.FILTER != "PASS" and record.FILTER is not None:\n+ writer.write_record(record)\n+ written_variants += 1\n+ return written_variants\n+\n+ def serialise(self, out_file):\n+ """Save the data in this class to a file for future use/reload.\n+\n+ Parameters:\n+ -----------\n+ out_file: str\n+ path to file where the data should be written to.\n+\n+ Returns:\n+ --------\n+ int:\n+ Number of variants written.\n+ """\n+ written_variants = 0\n+ with open(out_file, "w") as out_vcf:\n+ writer = vcf.Writer(out_vcf, self.out_template)\n+ for record in self.variants:\n+ writer.write_record(record)\n+ written_variants += 1\n+\n+ return written_variants\n+\n+ def update_filters(self, new_filters):\n+ """Update internal filters in the output template."""\n+ for new_filter, filter_data in new_filters.items():\n+ self.out_template.filters[new_filter] = filter_data\n+\n+\n+class VariantCaller(PHEMetaData):\n+ """Abstract class used for access to the implemented variant callers."""\n+\n+ __metaclass__ = abc.ABCMeta\n+\n+ def __init__(self, cmd_options=None):\n+ """Constructor for variant caller.\n+\n+ Parameters:\n+ -----------\n+ cmd_options: str, optional\n+ Command options to pass to the variant command.\n+ """\n+ self.cmd_options = cmd_options\n+\n+ super(VariantCaller, self).__init__()\n+\n+ @abc.abstractmethod\n+ def make_vcf(self, *args, **kwargs):\n+ """Create a VCF from **BAM** file.\n+\n+ Parameters:\n+ -----------\n+ ref: str\n+ Path to the reference file.\n+ bam: str\n+ Path to the indexed **BAM** file for calling variants.\n+ vcf_file: str\n+ path to the VCF file where data will be written to.\n+\n+ Returns:\n+ --------\n+ bool:\n+ True if variant calling was successful, False otherwise.\n+ """\n+ raise NotImplementedError("make_vcf is not implemented yet.")\n+\n+ @abc.abstractmethod\n+ def create_aux_files(self, ref):\n+ """Create needed (if any) auxiliary files.\n+ These files are required for proper functioning of the variant caller.\n+ """\n+ raise NotImplementedError("create_aux_files is not implemeted.")\n+\n+ @abc.abstractmethod\n+ def get_info(self, plain=False):\n+ """Get information about this variant caller."""\n+ raise NotImplementedError("Get info has not been implemented yet."\n+ )\n+ def get_meta(self):\n+ """Get the metadata about this variant caller."""\n+ od = self.get_info()\n+ od["ID"] = "VariantCaller"\n+ return OrderedDict({"PHEVariantMetaData": [od]})\n+\n+ @abc.abstractmethod\n+ def get_version(self):\n+ """Get the version of the underlying command used."""\n+ raise NotImplementedError("Get version has not been implemented yet.")\n' |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant/variant_factory.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant/variant_factory.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,92 @@ +'''Classes and functions for working with variant callers. + +Created on 22 Sep 2015 + +@author: alex +''' +import glob +import inspect +import logging +import os +import sys + +from phe.variant import VariantCaller + +def dynamic_caller_loader(): + """Fancy way of dynamically importing existing variant callers. + + Returns + ------- + dict: + Available variant callers dictionary. Keys are parameters that + can be used to call variants. + """ + + # We assume the caller are in the same directory as THIS file. + variants_dir = os.path.dirname(__file__) + variants_dir = os.path.abspath(variants_dir) + + # This is populated when the module is first imported. + avail_callers = {} + + # Add this directory to the syspath. + sys.path.insert(0, variants_dir) + + # Find all "py" files. + for caller_mod in glob.glob(os.path.join(variants_dir, "*.py")): + + # Derive name of the module where caller is. + caller_mod_file = os.path.basename(caller_mod) + + # Ignore __init__ file, only base class is there. + if caller_mod_file.startswith("__init__"): + continue + + # Import the module with a caller. + mod = __import__(caller_mod_file.replace(".pyc", "").replace(".py", "")) + + # Find all the classes contained in this module. + classes = inspect.getmembers(mod, inspect.isclass) + for cls_name, cls in classes: + # For each class, if it is a sublass of VariantCaller, add it. + if cls_name != "VariantCaller" and issubclass(cls, VariantCaller): + # The name is inherited and defined within each caller. + avail_callers[cls.name] = cls + + sys.path.remove(variants_dir) + + return avail_callers + +_avail_variant_callers = dynamic_caller_loader() + +def available_callers(): + """Return list of available variant callers.""" + return _avail_variant_callers.keys() + +def factory(variant=None, custom_options=None): + """Make an instance of a variant class. + + Parameters: + ----------- + variant: str, optional + Name of the variant class to instantiate. + custom_options: str, optional + Custom options to be passed directly to the implementing class. + + Returns: + -------- + :py:class:`phe.variant.VariantCaller`: + Instance of the :py:class:`phe.variant.VariantCaller` for given + variant name, or None if one couldn't be found. + """ + if variant is not None and isinstance(variant, str): + + variant = variant.lower() + if variant in _avail_variant_callers: + return _avail_variant_callers[variant](cmd_options=custom_options) + else: + logging.error("No implementation for %s mapper.") + return None + + logging.warn("Unknown parameters. Mapper could not be initialised.") + return None |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/ADFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/ADFilter.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,79 @@ +'''Filter VCFs on AD ratio. + +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class ADFilter(PHEFilterBase): + '''Filter sites by AD ratio.''' + + + name = "ADRatio" + _default_threshold = 0.9 + parameter = "ad_ratio" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, + help="Filter sites below minimum ad ratio (default: %s)" % self._default_threshold) + + def __init__(self, args): + """AD Ratio constructor.""" + # This needs to happen first, because threshold is initialised here. + super(ADFilter, self).__init__(args) + + # Change the threshold to custom dp value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.ad_ratio + elif isinstance(args, dict): + try: + self.threshold = float(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + if not record.is_snp: + return None + + if len(record.samples) > 1: + logging.warn("More than 1 sample detected. Only first is considered.") + + try: + record_ad = record.samples[0].data.AD + + # FIXME: when record length is > 2, what do you do? + assert len(record_ad) == 2, "AD data is incomplete POS: %i" % record.POS + + depth = sum(record.samples[0].data.AD) + + ratio = float(record_ad[1]) / depth + except Exception: + logging.error("Could not calculate AD ratio from %s POS: %s", record_ad, record.POS) + ratio = None + + if ratio is None or ratio < self.threshold: + # FIXME: When ratio is None, i.e. error, what do you do? + return ratio or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (AD ratio > %s )" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/DP4Filter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/DP4Filter.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,76 @@ +'''Filter VCFs on AD ratio. + +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class DP4Filter(PHEFilterBase): + '''Filter sites by AD ratio.''' + + + name = "DP4" + _default_threshold = 0.9 + parameter = "dp4_ratio" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, + help="Filter sites below minimum dp4 ratio (default: %s)" % self._default_threshold) + + def __init__(self, args): + """AD Ratio constructor.""" + # This needs to happen first, because threshold is initialised here. + super(DP4Filter, self).__init__(args) + + # Change the threshold to custom dp value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.ad_ratio + elif isinstance(args, dict): + try: + self.threshold = float(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + if not record.is_snp: + return None + + try: + record_dp = record.INFO.get("DP4") + + # FIXME: when record length is > 2, what do you do? + assert len(record_dp) == 4, "DP4 data should have 4 datum POS: %i" % record.POS + + depth = sum(record_dp) + + ratio = float(sum(record_dp[2:])) / depth + except Exception: + logging.error("Could not calculate DP4 ratio from %s POS: %s", record_dp, record.POS) + ratio = None + + if ratio is None or ratio < self.threshold: + # FIXME: When ratio is None, i.e. error, what do you do? + return ratio or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (DP4 ratio > %s )" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/DepthFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/DepthFilter.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,68 @@ +'''Filter VCF on depth of coverage. + +Created on 24 Sep 2015 + +@author: alex +''' +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class DepthFilter(PHEFilterBase): + """Filter sites by depth.""" + + name = "MinDepth" + _default_threshold = 5 + parameter = "min_depth" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--" % arg_name, type=int, default=self._default_threshold, + help="Filter sites below minimum depth (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Depth constructor.""" + # This needs to happen first, because threshold is initialised here. + super(DepthFilter, self).__init__(args) + + # Change the threshold to custom dp value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.min_depth + elif isinstance(args, dict): + try: + self.threshold = int(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + if len(record.samples) > 1: + logging.warn("Currently we only filter VCFs with 1 sample. Only first sample will be used.") + + try: + record_dp = record.samples[0].data.DP + except AttributeError: + record_dp = None + + if record_dp is None: +# logging.debug("Falling back to INFO DP") + record_dp = record.INFO.get("DP") + + if record_dp is None or record_dp < self.threshold: + return record_dp or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (DP > %i)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/GQFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/GQFilter.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,69 @@ +'''Filter VCF on GQ parameter. +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class GQFilter(PHEFilterBase): + '''Filter sites by GQ score.''' + + name = "MinGQ" + _default_threshold = 0 + parameter = "gq_score" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=int, default=self._default_threshold, + help="Filter sites below given GQ score (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Depth constructor.""" + # This needs to happen first, because threshold is initialised here. + super(GQFilter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.gq_score + elif isinstance(args, dict): + try: + self.threshold = int(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + if not record.is_snp: + return None + + if len(record.samples) > 1: + logging.warn("More than 1 sample detected. Only first is considered.") + + try: + record_gq = record.samples[0].data.GQ + except AttributeError: + logging.error("Could not retrieve GQ score POS %i", record.POS) + record_gq = None + + if record_gq is None or record_gq < self.threshold: + # FIXME: when record_gq is None, i,e, error, what do you do? + return record_gq or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (GQ > %s)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/GTFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/GTFilter.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,72 @@ +'''Filter VCF on GT filter parameter. + +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class UncallableGTFilter(PHEFilterBase): + '''Filter uncallable genotypes''' + + name = "UncallGT" + _default_threshold = None + parameter = "uncall_gt" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=str, default=self._default_threshold, + help="Filter sites below given GQ score (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Depth constructor.""" + # This needs to happen first, because threshold is initialised here. + super(UncallableGTFilter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.gq_score + elif isinstance(args, dict): + try: + self.threshold = str(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + if len(record.samples) > 1: + logging.warn("More than 1 sample detected. Only first is considered.") + + try: + record_gt = record.samples[0].data.GT + except AttributeError: + logging.error("Could not retrieve GQ score POS %i", record.POS) + record_gt = None + + if record_gt is None: + return "./." + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (GT != ./. )" % (short_desc) + + return short_desc + + def is_gap(self): + return True + + def is_n(self): + return False |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/MQ0FFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/MQ0FFilter.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,60 @@ +'''Filter VCF on MQ filter. +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class MQ0Filter(PHEFilterBase): + '''Filter sites by MQ0 (Total Mapping Quality Zero Reads) to DP ratio.''' + + name = "MinMQ0F" + _default_threshold = 0.05 + parameter = "mq0f_ratio" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, + help="Filter sites below given MQ0F ratio (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Mapping Quality Zero constructor.""" + # This needs to happen first, because threshold is initialised here. + super(MQ0Filter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.mq_score + elif isinstance(args, dict): + try: + self.threshold = float(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + + record_mq = record.INFO.get("MQ0F") + + if record_mq is None or record_mq > self.threshold: + # FIXME: when record_mq is None, i,e, error/missing, what do you do? + return record_mq or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (MQ0F > %s)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/MQ0Filter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/MQ0Filter.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,64 @@ +'''Filter VCF on MQ filter. +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class MQ0Filter(PHEFilterBase): + '''Filter sites by MQ0 (Total Mapping Quality Zero Reads) to DP ratio.''' + + name = "MinMQ0" + _default_threshold = 0.05 + parameter = "mq0_ratio" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, + help="Filter sites below given MQ score (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Mapping Quality Zero constructor.""" + # This needs to happen first, because threshold is initialised here. + super(MQ0Filter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.mq_score + elif isinstance(args, dict): + try: + self.threshold = float(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + + record_mq = record.INFO.get("MQ0") + + if record_mq: + # We consider DO from INFO not samples because MQ0 is also from INFO. + record_mq /= float(record.INFO.get("DP")) + + if record_mq is None or record_mq > self.threshold: + # FIXME: when record_mq is None, i,e, error/missing, what do you do? + return record_mq or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (MQ0 > %s)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/MQFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/MQFilter.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,59 @@ +'''Filter VCF on MQ filter. +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class MQFilter(PHEFilterBase): + '''Filter sites by Mapping Quality (MQ) score.''' + + name = "MinMQ" + _default_threshold = 30 + parameter = "mq_score" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=int, default=self._default_threshold, + help="Filter sites below given MQ score (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Mapping Quality constructor.""" + # This needs to happen first, because threshold is initialised here. + super(MQFilter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.mq_score + elif isinstance(args, dict): + try: + self.threshold = int(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + record_mq = record.INFO.get("MQ") + + if record_mq is None or record_mq < self.threshold: + # FIXME: when record_mq is None, i,e, error/missing, what do you do? + return record_mq or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (MQ > %s)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/QualFilter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/QualFilter.py Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,60 @@ +'''Filter VCF on GQ parameter. +Created on 24 Sep 2015 + +@author: alex +''' + +import argparse +import logging + +from phe.variant_filters import PHEFilterBase + + +class QualFilter(PHEFilterBase): + '''Filter sites by QUAL score.''' + + name = "LowSNPQual" + _default_threshold = 40.0 + parameter = "qual_score" + + @classmethod + def customize_parser(self, parser): + arg_name = self.parameter.replace("_", "-") + parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold, + help="Filter sites below given GQ score (default: %s)" % self._default_threshold) + + def __init__(self, args): + """Min Depth constructor.""" + # This needs to happen first, because threshold is initialised here. + super(QualFilter, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + if isinstance(args, argparse.Namespace): + self.threshold = args.gq_score + elif isinstance(args, dict): + try: + self.threshold = float(args.get(self.parameter)) + except TypeError: + logging.error("Could not retrieve threshold from %s", args.get(self.parameter)) + self.threshold = None + + def __call__(self, record): + """Filter a :py:class:`vcf.model._Record`.""" + + + record_qual = record.QUAL + + if record_qual is None or record_qual < self.threshold: + # FIXME: when record_gq is None, i,e, error, what do you do? + return record_qual or False + else: + return None + + def short_desc(self): + short_desc = self.__doc__ or '' + + if short_desc: + short_desc = "%s (QUAL > %s)" % (short_desc, self.threshold) + + return short_desc |
b |
diff -r 000000000000 -r 834a312c0114 phe/variant_filters/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phe/variant_filters/__init__.py Thu Dec 10 09:22:39 2015 -0500 |
[ |
@@ -0,0 +1,209 @@ +"""Classes and functions for working with variant filters.""" + +from __builtin__ import __import__ +from abc import abstractproperty +import abc +import argparse +import glob +import inspect +import logging +import os +import re +import sys + +import vcf +import vcf.filters +from vcf.parser import _Filter + +IUPAC_CODES = {frozenset(["A", "G"]): "R", + frozenset(["C", "T"]): "Y", + frozenset(["G", "C"]): "S", + frozenset(["A", "T"]): "W", + frozenset(["G", "T"]): "K", + frozenset(["A", "C"]): "M", + frozenset(["C", "G", "T"]): "B", + frozenset(["A", "G", "T"]): "D", + frozenset(["A", "C", "T"]): "H", + frozenset(["A", "C", "G"]): "V" + } + +class PHEFilterBase(vcf.filters.Base): + """Base class for VCF filters.""" + __meta__ = abc.ABCMeta + + magic_sep = ":" + decoder_pattern = re.compile(magic_sep) + + @abc.abstractproperty + def parameter(self): + """Short name of parameter being filtered.""" + return self.parameter + + @abc.abstractproperty + def _default_threshold(self): + """Default threshold for filtering.""" + return self._default_threshold + + def __init__(self, args): + super(PHEFilterBase, self).__init__(args) + + # Change the threshold to custom gq value. + self.threshold = self._default_threshold + + if isinstance(args, dict): + self.threshold = args.get(self.parameter) + + def __str__(self): + return self.filter_name() + + @abc.abstractmethod + def short_desc(self): + """Short description of the filter (included in VCF).""" + raise NotImplementedError("Get short description is not implemented.") + + def get_config(self): + """This is used for reconstructing filter.""" + return {self.parameter: self.threshold} + + def filter_name(self): + """Create filter names by their parameter separated by magic. + E.g. if filter parameter is ad_ratio and threshold is 0.9 then + ad_ratio:0.9 if the filter name. + """ + return "%s%s%s" % (self.parameter, self.magic_sep, self.threshold) + + @staticmethod + def decode(filter_id): + """Decode name of filter.""" + conf = {} + + if PHEFilterBase.magic_sep in filter_id: + info = PHEFilterBase.decoder_pattern.split(filter_id) + assert len(info) == 2 + conf[info[0]] = info[1] + return conf + + def is_gap(self): + return False + + def is_n(self): + return True + + @staticmethod + def call_concensus(record): + extended_code = "N" + try: + sample_ad = set([str(c) for c in record.ALT] + [record.REF]) + + + for code, cov in IUPAC_CODES.items(): + if sample_ad == cov: + extended_code = code + break + except AttributeError: + extended_code = "N" + + return extended_code + +def dynamic_filter_loader(): + """Fancy way of dynamically importing existing filters. + + Returns + ------- + dict: + Available filters dictionary. Keys are parameters that + can be supplied to the filters. + """ + + # We assume the filters are in the same directory as THIS file. + filter_dir = os.path.dirname(__file__) + filter_dir = os.path.abspath(filter_dir) + + # This is populated when the module is first imported. + avail_filters = {} + + # Add this directory to the syspath. + sys.path.insert(0, filter_dir) + + # Find all "py" files. + for filter_mod in glob.glob(os.path.join(filter_dir, "*.py")): + + # Derive name of the module where filter is. + filter_mod_file = os.path.basename(filter_mod) + + # Ignore this file, obviously. + if filter_mod_file.startswith("__init__"): + continue + + # Import the module with a filter. + mod = __import__(filter_mod_file.replace(".pyc", "").replace(".py", "")) + + # Find all the classes contained in this module. + classes = inspect.getmembers(mod, inspect.isclass) + for cls_name, cls in classes: + # For each class, if it is a sublass of PHEFilterBase, add it. + if cls_name != "PHEFilterBase" and issubclass(cls, PHEFilterBase): + # The parameters are inherited and defined within each filter. + avail_filters[cls.parameter] = cls + + sys.path.remove(filter_dir) + + return avail_filters + +_avail_filters = dynamic_filter_loader() + +def available_filters(): + """Return list of available filters.""" + return _avail_filters.keys() + +def str_to_filters(filters): + """Convert from filter string to array of filters. + E.g. ad_ration:0.9,min_depth:5 + + Parameters: + ----------- + filters: str + String version of filters, separated by comma. + + Returns: + -------- + list: + List of :py:class:`phe.variant_filters.PHEFilterBase` instances. + """ + + config = {} + for kv_pair in filters.split(","): + pair = kv_pair.split(":") + assert len(pair) == 2, "Filters should be separated by ':' %s" % kv_pair + + # We don't care about casting them to correct type because Filters + # will do it for us. + config[pair[0]] = pair[1] + + return make_filters(config) + +def make_filters(config): + """Create a list of filters from *config*. + + Parameters: + ----------- + config: dict, optional + Dictionary with parameter: value pairs. For each parameter, an + appropriate Filter will be found and instanciated. + + Returns: + -------- + list: + List of :py:class:`PHEFilterBase` filters. + """ + filters = [] + + if config: + for custom_filter in config: + if custom_filter in _avail_filters: + filters.append(_avail_filters[custom_filter](config)) + else: + logging.warn("Could not find appropriate filter for %s", + custom_filter) + + return filters |
b |
diff -r 000000000000 -r 834a312c0114 test-data/test_input.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_input.vcf Thu Dec 10 09:22:39 2015 -0500 |
[ |
b'@@ -0,0 +1,1000 @@\n+##fileformat=VCFv4.1\n+##GATKCommandLine=<ID=SelectVariants,Version=2.6-5-gba531bd,Date="Thu May 22 09:24:36 BST 2014",Epoch=1400747076888,CommandLineOptions="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/phengs/galaxy/database/tmp/tmp-gatk-lMh4vi/gatk_input.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=2 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false variant=(RodBinding name=variant source=/phengs/galaxy/database/tmp/tmp-gatk-lMh4vi/input_variant.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false restrictAllelesTo=ALL keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[SNP] keepIDs=null fullyDecode=false forceGenotypesDecode=false justRead=false maxIndelSize=2147483647 ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.6-5-gba531bd,Date="Thu May 22 09:10:24 BST 2014",Epoch=1400746224902,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/phengs/galaxy/database/tmp/tmp-gatk-x7S0pF/gatk_input_0.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/phengs/galaxy/database/tmp/tmp-gatk-x7S0pF/gatk_input.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=STRICT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=2 num_cpu_threads_per_data_thread=6 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree='..b'MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227782\t.\tA\t.\t.\tmq_score:50\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227783\t.\tG\t.\t.\tmq_score:50\tDP=91;MQ=0.0;MQ0=91\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227784\t.\tG\t.\t.\tmq_score:50\tDP=91;MQ=0.0;MQ0=91\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227785\t.\tT\t.\t.\tmq_score:50\tDP=91;MQ=0.0;MQ0=91\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227786\t.\tG\t.\t.\tmq_score:50\tDP=90;MQ=0.0;MQ0=90\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227787\t.\tT\t.\t.\tmq_score:50\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227788\t.\tA\t.\t.\tmq_score:50\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227789\t.\tG\t.\t.\tmq_score:50\tDP=88;MQ=0.0;MQ0=88\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227790\t.\tC\t.\t.\tmq_score:50\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227791\t.\tG\t.\t.\tmq_score:50\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227792\t.\tG\t.\t.\tmq_score:50\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227793\t.\tT\t.\t.\tmq_score:50\tDP=84;MQ=0.0;MQ0=84\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227794\t.\tG\t.\t.\tmq_score:50\tDP=84;MQ=0.0;MQ0=84\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227795\t.\tA\t.\t.\tmq_score:50\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227796\t.\tA\t.\t.\tmq_score:50\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227797\t.\tA\t.\t.\tmq_score:50\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227798\t.\tT\t.\t.\tmq_score:50\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227799\t.\tG\t.\t.\tmq_score:50\tDP=93;MQ=0.0;MQ0=93\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227800\t.\tC\t.\t.\tmq_score:50\tDP=93;MQ=0.0;MQ0=93\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227801\t.\tG\t.\t.\tmq_score:50\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227802\t.\tT\t.\t.\tmq_score:50\tDP=91;MQ=0.0;MQ0=91\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227803\t.\tA\t.\t.\tmq_score:50\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227804\t.\tG\t.\t.\tmq_score:50\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227805\t.\tA\t.\t.\tmq_score:50\tDP=96;MQ=0.0;MQ0=96\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227806\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227807\t.\tA\t.\t.\tmq_score:50\tDP=109;MQ=0.0;MQ0=109\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227808\t.\tT\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227809\t.\tC\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227810\t.\tT\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227811\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227812\t.\tG\t.\t.\tmq_score:50\tDP=105;MQ=0.0;MQ0=105\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227813\t.\tA\t.\t.\tmq_score:50\tDP=105;MQ=0.0;MQ0=105\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227814\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227815\t.\tG\t.\t.\tmq_score:50\tDP=106;MQ=0.0;MQ0=106\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227816\t.\tA\t.\t.\tmq_score:50\tDP=102;MQ=0.0;MQ0=102\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227817\t.\tA\t.\t.\tmq_score:50\tDP=102;MQ=0.0;MQ0=102\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227818\t.\tT\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227819\t.\tA\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227820\t.\tC\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227821\t.\tC\t.\t.\tmq_score:50\tDP=97;MQ=0.0;MQ0=97\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227822\t.\tG\t.\t.\tmq_score:50\tDP=99;MQ=0.0;MQ0=99\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227823\t.\tG\t.\t.\tmq_score:50\tDP=97;MQ=0.0;MQ0=97\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227824\t.\tT\t.\t.\tmq_score:50\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227825\t.\tG\t.\t.\tmq_score:50\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227826\t.\tG\t.\t.\tmq_score:50\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227827\t.\tC\t.\t.\tmq_score:50\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227828\t.\tG\t.\t.\tmq_score:50\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n' |
b |
diff -r 000000000000 -r 834a312c0114 test-data/test_output.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output.vcf Thu Dec 10 09:22:39 2015 -0500 |
[ |
b'@@ -0,0 +1,1001 @@\n+##fileformat=VCFv4.1\n+##GATKCommandLine=<ID=SelectVariants,Version=2.6-5-gba531bd,Date="Thu May 22 09:24:36 BST 2014",Epoch=1400747076888,CommandLineOptions="analysis_type=SelectVariants input_file=[] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/phengs/galaxy/database/tmp/tmp-gatk-lMh4vi/gatk_input.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=2 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false variant=(RodBinding name=variant source=/phengs/galaxy/database/tmp/tmp-gatk-lMh4vi/input_variant.vcf) discordance=(RodBinding name= source=UNBOUND) concordance=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sample_name=[] sample_expressions=null sample_file=null exclude_sample_name=[] exclude_sample_file=[] select_expressions=[] excludeNonVariants=false excludeFiltered=false restrictAllelesTo=ALL keepOriginalAC=false mendelianViolation=false mendelianViolationQualThreshold=0.0 select_random_fraction=0.0 remove_fraction_genotypes=0.0 selectTypeToInclude=[SNP] keepIDs=null fullyDecode=false forceGenotypesDecode=false justRead=false maxIndelSize=2147483647 ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.6-5-gba531bd,Date="Thu May 22 09:10:24 BST 2014",Epoch=1400746224902,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/phengs/galaxy/database/tmp/tmp-gatk-x7S0pF/gatk_input_0.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/phengs/galaxy/database/tmp/tmp-gatk-x7S0pF/gatk_input.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=STRICT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=2 num_cpu_threads_per_data_thread=6 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree='..b'6\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=90;MQ=0.0;MQ0=90\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227787\t.\tT\t.\t.\tmq_score:50;min_depth:100\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227788\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227789\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=88;MQ=0.0;MQ0=88\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227790\t.\tC\t.\t.\tmq_score:50;min_depth:100\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227791\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227792\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=87;MQ=0.0;MQ0=87\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227793\t.\tT\t.\t.\tmq_score:50;min_depth:100\tDP=84;MQ=0.0;MQ0=84\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227794\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=84;MQ=0.0;MQ0=84\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227795\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227796\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227797\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227798\t.\tT\t.\t.\tmq_score:50;min_depth:100\tDP=89;MQ=0.0;MQ0=89\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227799\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=93;MQ=0.0;MQ0=93\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227800\t.\tC\t.\t.\tmq_score:50;min_depth:100\tDP=93;MQ=0.0;MQ0=93\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227801\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227802\t.\tT\t.\t.\tmq_score:50;min_depth:100\tDP=91;MQ=0.0;MQ0=91\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227803\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=92;MQ=0.0;MQ0=92\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227804\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227805\t.\tA\t.\t.\tmq_score:50;min_depth:100\tDP=96;MQ=0.0;MQ0=96\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227806\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227807\t.\tA\t.\t.\tmq_score:50\tDP=109;MQ=0.0;MQ0=109\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227808\t.\tT\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227809\t.\tC\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227810\t.\tT\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227811\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227812\t.\tG\t.\t.\tmq_score:50\tDP=105;MQ=0.0;MQ0=105\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227813\t.\tA\t.\t.\tmq_score:50\tDP=105;MQ=0.0;MQ0=105\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227814\t.\tG\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227815\t.\tG\t.\t.\tmq_score:50\tDP=106;MQ=0.0;MQ0=106\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227816\t.\tA\t.\t.\tmq_score:50\tDP=102;MQ=0.0;MQ0=102\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227817\t.\tA\t.\t.\tmq_score:50\tDP=102;MQ=0.0;MQ0=102\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227818\t.\tT\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227819\t.\tA\t.\t.\tmq_score:50\tDP=103;MQ=0.0;MQ0=103\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227820\t.\tC\t.\t.\tmq_score:50\tDP=104;MQ=0.0;MQ0=104\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227821\t.\tC\t.\t.\tmq_score:50;min_depth:100\tDP=97;MQ=0.0;MQ0=97\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227822\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=99;MQ=0.0;MQ0=99\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227823\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=97;MQ=0.0;MQ0=97\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227824\t.\tT\t.\t.\tmq_score:50;min_depth:100\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227825\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227826\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=95;MQ=0.0;MQ0=95\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227827\t.\tC\t.\t.\tmq_score:50;min_depth:100\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n+gi|15829254|ref|NC_002695.1|\t227828\t.\tG\t.\t.\tmq_score:50;min_depth:100\tDP=94;MQ=0.0;MQ0=94\tGT\t./.\n' |
b |
diff -r 000000000000 -r 834a312c0114 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Dec 10 09:22:39 2015 -0500 |
b |
@@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="python" version="2.7.10"> + <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="pyyaml" version="3.11"> + <repository changeset_revision="99267d131c05" name="package_python_2_7_pyyaml_3_11" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="pyvcf" version="0.6.7"> + <repository changeset_revision="c05e29a21f10" name="package_pyvcf_0_6_7" owner="saket-choudhary" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |