Mercurial > repos > gandres > vcftools_filter_stats_diversity
changeset 10:2b7eb79f0ba0 draft
planemo upload
author | gandres |
---|---|
date | Wed, 13 Apr 2016 06:49:39 -0400 |
parents | ce984119f669 |
children | f494c8d22725 |
files | VCFToolFilter/GetChromOfVCF.pl VCFToolFilter/find_indiv.py |
diffstat | 2 files changed, 34 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/GetChromOfVCF.pl Wed Apr 13 06:49:39 2016 -0400 @@ -0,0 +1,25 @@ +#!/usr/bin/perl + +use strict; + +my $vcf = $ARGV[0]; + +my %chrs; +my $ok = 0; +open(my $V,$vcf); +while(<$V>) +{ + if ($ok) + { + my ($chr,$pos) = split(/\t/,$_); + $chrs{$chr}++; + } + if (/#CHROM/){$ok = 1;} +} +close($V); + +foreach my $chr(sort keys(%chrs)) +{ + my $nb = $chrs{$chr}; + print "$chr $nb\n"; +}
--- a/VCFToolFilter/find_indiv.py Wed Apr 13 03:32:59 2016 -0400 +++ b/VCFToolFilter/find_indiv.py Wed Apr 13 06:49:39 2016 -0400 @@ -12,8 +12,14 @@ def get_field_chrs_options(dataset): options = [] - chrs=os.popen("grep -v '#' %s | cut -f1 | sort -u "%dataset.file_name).read()[:-1].split('\n') - for opt in chrs: - options.append((opt,opt, True)) + chrs=os.popen("grep '##contig' %s"%dataset.file_name).read()[:-1].split('\n') + if len(chr)>0: + for line in chrs: + opt=re.search('^##contig=<ID=(\w+),length=',line).group(1) + options.append((opt,opt, True)) + else : + chrs=os.popen("perl GetChromOfVCF.pl %s | cut -f1 "%dataset.file_name).read()[:-1].split('\n') + for opt in chrs: + options.append((opt,opt, True)) return options