Mercurial > repos > jjohnson > pileup_to_vcf
changeset 9:c0a6e8f595ec default tip
Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 11 Apr 2013 10:28:10 -0500 |
parents | 07cd87e94fbe |
children | |
files | README pileup_to_vcf.py pileup_to_vcf.xml |
diffstat | 3 files changed, 13 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/README Thu Mar 28 14:55:50 2013 -0500 +++ b/README Thu Apr 11 10:28:10 2013 -0500 @@ -2,4 +2,4 @@ Filters on read coverage, base quality and the frequency of a variant. The VCF info is populated with a SAF tag that give the specific frequency (0 - 1) of a variant being observed. This is used for filtering in the mmuff ( Missense Mutation and Frameshift Finder) workflow. - +The VCF ID field can be optionally set, which can be used with SnpSift annotate or SnpSift filter.
--- a/pileup_to_vcf.py Thu Mar 28 14:55:50 2013 -0500 +++ b/pileup_to_vcf.py Thu Apr 11 10:28:10 2013 -0500 @@ -34,7 +34,7 @@ vcf_header = """\ ##fileformat=VCFv4.0 -##source=pileup_to_vcf.pyV1.1 +##source=pileup_to_vcf.pyV1.2 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> @@ -55,6 +55,8 @@ parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) + # ID to use + parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' ) # select columns parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) @@ -117,6 +119,8 @@ else: outputFile = sys.stdout + vcf_id = options.id if options.id else "." + indel_len_pattern = '([1-9][0-9]*)' ref_skip_pattern = '[<>]' @@ -251,7 +255,6 @@ alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this safs.append(saf) if len(alts) > 0: - vcf_id = "." vcf_qual = "." vcf_filter = "PASS" # if not allow_multiples, report only the most freq alt
--- a/pileup_to_vcf.xml Thu Mar 28 14:55:50 2013 -0500 +++ b/pileup_to_vcf.xml Thu Apr 11 10:28:10 2013 -0500 @@ -1,4 +1,4 @@ -<tool id="pileup_to_vcf" name="Pileup to VCF" version="2.1"> +<tool id="pileup_to_vcf" name="Pileup to VCF" version="2.2"> <description>Converts a pileup to VCF with filtering</description> <command interpreter="python">pileup_to_vcf.py -i $input_file -o $output_file #if $min_cvrg.__str__ != '': @@ -15,6 +15,9 @@ #end if $allow_multiples $snps_only + #if $vcf_id.__str__ != '': + --id $vcf_id + #end if #if $cols.select_order == 'yes' : #if $chrom_col.__str__ != '': --chrom_col $chrom_col @@ -65,6 +68,9 @@ <option value="qual">Reads at this position taht pass the base call quality threshold</option> <option value="all">All reads and indels</option> </param> + <param name="vcf_id" type="text" optional="true" value="" label="The VCF ID" help="The VCF output will use this as the ID field value"> + <validator type="regex" message="whitespace characters not allowed">^\S*$</validator> + </param> </inputs> <outputs> <data format="vcf" metadata_source="input_file" name="output_file" />