Previous changeset 1:0f67ed444d47 (2015-07-02) Next changeset 3:612066e3f57d (2015-11-09) |
Commit message:
Uploaded |
added:
VCFToolFilter/VCFToolsFilter.pl VCFToolFilter/test-data/result.log VCFToolFilter/test-data/result.vcf VCFToolFilter/test-data/sample.vcf VCFToolFilter/vcfToolsFilter.sh VCFToolFilter/vcfToolsFilter.xml VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml VCFToolsStats/.svn/entries VCFToolsStats/.svn/text-base/VCFToolsStats.pl.svn-base VCFToolsStats/.svn/text-base/vcfToolsStats.sh.svn-base VCFToolsStats/.svn/text-base/vcfToolsStats.xml.svn-base VCFToolsStats/VCFToolsStats.pl VCFToolsStats/test-data/.svn/entries VCFToolsStats/test-data/.svn/text-base/result.TsTv.summary.svn-base VCFToolsStats/test-data/.svn/text-base/result.annotation.svn-base VCFToolsStats/test-data/.svn/text-base/result.het.svn-base VCFToolsStats/test-data/.svn/text-base/result.imiss.svn-base VCFToolsStats/test-data/.svn/text-base/result.log.svn-base VCFToolsStats/test-data/.svn/text-base/sample.vcf.svn-base VCFToolsStats/test-data/result.TsTv.summary VCFToolsStats/test-data/result.annotation VCFToolsStats/test-data/result.het VCFToolsStats/test-data/result.imiss VCFToolsStats/test-data/result.log VCFToolsStats/test-data/sample.vcf VCFToolsStats/vcfToolsStats.sh VCFToolsStats/vcfToolsStats.xml tool_dependencies.xml |
removed:
vcftools_main/VCFToolFilter/VCFToolsFilter.pl vcftools_main/VCFToolFilter/test-data/result.log vcftools_main/VCFToolFilter/test-data/result.vcf vcftools_main/VCFToolFilter/test-data/sample.vcf vcftools_main/VCFToolFilter/vcfToolsFilter.sh vcftools_main/VCFToolFilter/vcfToolsFilter.xml vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl vcftools_main/VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml vcftools_main/VCFToolsStats/VCFToolsStats.pl vcftools_main/VCFToolsStats/test-data/result.TsTv.summary vcftools_main/VCFToolsStats/test-data/result.annotation vcftools_main/VCFToolsStats/test-data/result.het vcftools_main/VCFToolsStats/test-data/result.imiss vcftools_main/VCFToolsStats/test-data/result.log vcftools_main/VCFToolsStats/test-data/sample.vcf vcftools_main/VCFToolsStats/vcfToolsStats.sh vcftools_main/VCFToolsStats/vcfToolsStats.xml vcftools_main/tool_dependencies.xml |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/VCFToolsFilter.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/VCFToolsFilter.pl Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,158 @@ + +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <Output basename> + + <opts> are: + + -s, --samples <Samples to be analyzed. Comma separated list> + -c, --chromosomes <Chromosomes to be analyzed. Comma separated list> + -e, --export <Output format (VCF/freq/plink. Default: VCF> + -f, --frequency <Minimum MAF. Default: 0.001> + -m, --max_freq <Maximum MAF. Default: 0.5> + -a, --allow_missing <Allowed missing data proportion per site. Must be comprised between 0 and 1. Default: 1> + -n, --nb_alleles <Accepted number of alleles (min,max). Default: 2,4> + -t, --type <Type of polymorphisms to keep (ALL/SNP/INDEL). Default: ALL> + -b, --bounds <Lower bound and upper bound for a range of sites to be processed (start,end). Default: 1, 100000000> +~; +$usage .= "\n"; + +my ($input,$out); + + +#my $indel_size_max = 500; +#my $indel_size_min = 1; +my $frequency_max = 0.5; +my $frequency_min = 0.001; +my $pos_max = 100000000000; +my $pos_min = 0; +my $filter_snp_type = "all"; + +my $missing_data = 1; +my $export = "VCF"; +my $type = "ALL"; +my $nb_alleles; +my $bounds; +my $samples; +my $chromosomes; + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "samples=s" => \$samples, + "chromosomes=s" => \$chromosomes, + "frequency=s" => \$frequency_min, + "max_freq=s" => \$frequency_max, + "allow_missing=s"=> \$missing_data, + "export=s" => \$export, + "type=s" => \$type, + "nb_alleles=s" => \$nb_alleles, + "bounds=s" => \$bounds, +); + + +die $usage + if ( !$input || !$out); + + +my @dnasamples; +if ($samples) +{ + @dnasamples = split(",",$samples); +} +my @nalleles; +if ($nb_alleles) +{ + @nalleles = split(",",$nb_alleles); +} +my @boundaries; +if ($bounds) +{ + @boundaries = split(",",$bounds); +} +my @chromosomes_list; +if ($chromosomes) +{ + @chromosomes_list = split(",",$chromosomes); +} + + +my $experiment = "chromosomes"; +my $table = ""; +my %genes; +my @snp_ids; +my @snp_ids_and_positions; +my @snp_ids_and_positions_all; +my $gene; +my $snp_num = 0; +my %ref_sequences; +my %snps_of_gene; + + + + +my $indiv_cmd = ""; +if (@dnasamples) +{ + $indiv_cmd = "--indv " . join(" --indv ",@dnasamples); +} + +my $chrom_cmd = ""; +if (@chromosomes_list) +{ + $chrom_cmd = "--chr " . join(" --chr ",@chromosomes_list); +} + +my $export_cmd = "--recode"; +if ($export eq "freq") +{ + $export_cmd = "--freq"; +} +if ($export eq "plink") +{ + $export_cmd = "--plink"; +} + + + +my $nb_alleles_cmd = "--min-alleles 1 --max-alleles 4"; +if (@nalleles) +{ + $nb_alleles_cmd = "--min-alleles $nalleles[0] --max-alleles $nalleles[1]"; +} +my $bounds_cmd = "--from-bp 1 --to-bp 100000000"; +if (@boundaries) +{ + $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]"; +} + + +my $type_cmd = ""; +if ($type eq "INDEL") +{ + $type_cmd = "--keep-only-indels"; +} +if ($type eq "SNP") +{ + $type_cmd = "--remove-indels"; +} + + +system("vcftools --vcf $input --out $out --keep-INFO-all --remove-filtered-all $type_cmd $export_cmd $chrom_cmd $indiv_cmd $nb_alleles_cmd --maf $frequency_min --max-maf $frequency_max --max-missing $missing_data >>vcftools.log 2>&1"); + + + + + + + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/test-data/result.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/result.log Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,21 @@ + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --chr chr1 + --recode-INFO-all + --maf 0.001 + --max-alleles 4 + --max-maf 0.5 + --min-alleles 2 + --max-missing 1 + --out filtered + --recode + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting VCF file... +After filtering, kept 3616 out of a possible 4955 Sites +Run Time = 0.00 seconds |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/test-data/result.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/result.vcf Fri Jul 10 04:16:17 2015 -0400 |
[ |
b'@@ -0,0 +1,3661 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'0012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:30,25:55:99:802,0,993\n+chr1\t188173\t.\tG\tA\t697.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.617;DP=42;Dels=0.00;FS=2.786;HaplotypeScore=1.9991;MLEAC=1;MLEAF=0.500;MQ=59.09;MQ0=0;MQRankSum=-0.013;QD=16.61;ReadPosRankSum=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/test-data/sample.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/sample.vcf Fri Jul 10 04:16:17 2015 -0400 |
[ |
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/vcfToolsFilter.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/vcfToolsFilter.sh Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,46 @@ +#!/bin/bash + +tool_path=$(dirname $0) + +filein=$1 +fileout_label=$2 +fileout=$3 +filelog=$4 +export=$5 +frequency=$6 +max_freq=$7 +allow_missing=$8 +nb_alleles_min=$9 +nb_alleles_max=${10} +type=${11} +bound_start=${12} +bound_end=${13} + + +if [ "${14}" != "None" ] +then samples="--samples ${14}" +fi + +if [ "${15}" != "None" ] +then chromosomes="--chromosomes ${15}" +fi + +if [ "$bound_start" -gt "$bound_end" ] +then tmp=$bound_start ; bound_start=$bound_end ; bound_end=$tmp ; echo "Warning : Lower bound must be lower than greater bound!" >&2 +fi + +if [ "$nb_alleles_min" -gt "$nb_alleles_max" ] +then tmp=$nb_alleles_min ; nb_alleles_min=$nb_alleles_max ; nb_alleles_max=$tmp ; echo "Warning : Minimum number of alleles must be lower than maximum number of allele!" >&2 +fi + +perl $tool_path/VCFToolsFilter.pl --input $filein --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --nb_alleles $nb_alleles_min','$nb_alleles_max --type $type --bounds $bound_start','$bound_end $samples $chromosomes + +if [ "$export" = "VCF" ] +then cp $fileout_label.recode.vcf $fileout ; rm $fileout_label.recode.vcf +elif [ "$export" = "freq" ] +then cp $fileout_label.frq $fileout ; rm $fileout_label.frq +else cp $fileout_label.ped $fileout; cp $fileout_label.map ${16} ; rm $fileout_label.ped $fileout_label.map +fi + +cp vcftools.log $filelog +rm vcftools.log |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolFilter/vcfToolsFilter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/vcfToolsFilter.xml Fri Jul 10 04:16:17 2015 -0400 |
[ |
b'@@ -0,0 +1,298 @@\n+<tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">\n+ \n+ <!-- [REQUIRED] Tool description displayed after the tool name -->\n+ <description> </description>\n+ \n+ <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n+ <requirements>\n+ <requirement type="binary">perl</requirement>\n+\t<requirement type="package" version="0.1.12b">vcftools</requirement>\n+ </requirements>\n+ \n+ <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n+ <version_command>\n+<!--\n+ tool_binary -v\n+-->\n+ </version_command>\n+ \n+ <!-- [REQUIRED] The command to execute -->\n+ <command interpreter="perl">\n+\tvcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end\n+\t#if str( $samples ) == "":\n+\t\'None\'\n+\t#else\n+\t$samples\n+\t#end if\n+\t#if str( $chromosomes ) == "":\n+\t\'None\'\n+\t#else\n+\t$chromosomes\n+\t#end if\n+\t#if str( $export ) == "plink":\n+\t$fileout_map\n+\t#else\n+\t\'\'\n+\t#end if\n+ </command>\n+ \n+ <!-- [REQUIRED] Input files and tool parameters -->\n+ <inputs>\n+\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n+\t<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>\n+\t<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">\n+\t\t<validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+\t</param>\n+\t<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">\n+\t <validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+ </param>\n+\t<param name="export" type="select" label="Output format" >\n+\t <option value="VCF" selected="true">VCF</option>\n+\t <option value="freq">freq</option>\n+ <option value="plink">plink</option>\n+ </param>\n+\t<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />\n+\t<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />\n+\t<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />\n+\t<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />\n+\t<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />\n+ <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >\n+ <option value="ALL" selected="true">All</option>\n+ <option value="SNP">SNP</option>\n+ <option value="INDEL">Indel</option>\n+ </param>\n+\t<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />\n+\t<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />\n+ </inputs>\n+ \n+ <!-- [REQUIRED] Output files -->\n+ <outputs>\n+\t<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)==\'plink\' then \'ped\' else \'\' # #if str($export)==\'freq\' then \'frq\' else \'\' # #if str($export)==\'VCF\' then \'vcf\' else \'\' #" >\n+\t\t<change_format>\n+ \t<when input="export" value="freq" format="tabular" />\n+\t\t\t<when input="export" value="plink" format="txt" />\n+\t\t</change_format>\t\n+\t</data>\n+\t<data name="fileout_map" format="txt" label="${fileout_label}.map">\n+\t\t<filter>(export == \'plink\')</filter>\n+\t</data>\n+\t<data name="filelog" format="txt" label="${fileout_label}.log" />\n'..b'+\tChromosomes to be analyzed. Comma separated list\n+\n+Output format\n+\tVCF/freq/plink\n+\n+Minimum MAF\n+\tMinimum frequency\n+\n+Maximum MAF\n+\tMaximum frequency\n+\n+Missing data proportion\n+\tAllowed missing data proportion per site. Must be comprised between 0 and 1.\n+\n+Number of alleles\n+\tAccepted number of alleles min and max.\n+\n+Polymorphisms\n+\tType of polymorphisms to keep (ALL/SNP/INDEL).\n+Bounds\n+\tLower bound and upper bound for a range of sites to be processed.\n+\n+------------\n+Output files\n+------------\n+\n+VCF file\n+\tVCF file filtered \n+\n+Log file\n+\n+---------------------------------------------------\n+\n+---------------\n+Working example\n+---------------\n+\n+Input files\n+===========\n+\n+VCF file\n+---------\n+\n+::\n+\n+\t#fileformat=VCFv4.1\n+\t#FILTER=<ID=LowQual,Description="Low quality">\n+\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+\t[...]\n+\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n+\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n+\n+\n+Parameters\n+==========\n+\n+Output name -> filtered_chr1\n+\n+Chromosomes -> chr1\n+\n+Output format -> VCF\n+\n+Minimum MAF -> 0.001\n+\n+Maximum MAF -> 0.5\n+\n+Missing data proportion -> 1\n+\n+Number of alleles min -> 2\n+\n+Number of alleles max -> 4\n+\n+Polymorphisms -> All\n+\n+Lower bound -> 1\n+\n+Upper bound -> 100000000\n+\n+\n+Output files\n+============\n+\n+filtered_genelist_intron.vcf\n+----------------------------\n+\n+::\n+\n+ #fileformat=VCFv4.1\n+ #FILTER=<ID=LowQual,Description="Low quality">\n+ #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+ [...]\n+ CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1\n+\tchr1\t5059\t.\tC\tG\t146.84\t.\tAC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,8:8:18:175,18,0\n+\n+\n+ </help>\n+ <citations>\n+ <!-- [HELP] As DOI or BibTex entry -->\n+ <citation type="bibtex">\n+ @article{Danecek01082011,\n+ author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group},\n+ title = {The variant call format and VCFtools},\n+ volume = {27},\n+ number = {15},\n+ pages = {2156-2158},\n+ year = {2011},\n+ doi = {10.1093/bioinformatics/btr330},\n+ abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk},\n+ URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract},\n+ eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html},\n+ journal = {Bioinformatics}\n+ }\n+ </citation>\n+\n+ </citations>\n+\n+</tool>\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,122 @@ + +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <output basename> + -s, --step <step size> + -w, --window <window size> + +<opts> are: + -g, --group <group file> +~; +$usage .= "\n"; + +my ($input,$out); +my $window = 200000; +my $step = 200000; +my $groupfile; +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "step=s" => \$step, + "window=s" => \$window, + "group=s" => \$groupfile +); + + + + +die $usage + if ( !$input); + +my %hash; +my $cmd_part = ""; +if ($groupfile && -e $groupfile) +{ + open(my $G,$groupfile) or die "Cannot open $groupfile: $!"; + while(<$G>) + { + my $line = $_; + chomp($line); + $line=~s/\r//g; + $line=~s/\n//g; + my @infos = split(/;/,$line); + if ($infos[0] && $infos[1]) + { + $hash{$infos[1]} .= " --indv " . $infos[0]; + $cmd_part .= " --indv " . $infos[0]; + } + } + close($G); +} + + +if ($step =~/^(\d+)\s*$/){ + $step = $1; +} +else{ + die "Error: step size must be an integer\n"; +} +if ($window =~/^(\d+)\s*$/){ + $window = $1; +} +else{ + die "Error: window size must be an integer\n"; +} + + +my $VCFTOOLS_EXE = "vcftools"; + +system("vcf-sort $input >$input.sorted"); + + + +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1"); +system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1"); + +if (keys(%hash) > 0) +{ + my $files_pi = ""; + my $files_dtajima = ""; + foreach my $pop(sort(keys(%hash))) + { + my $cmd_part = $hash{$pop}; + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi"; + system($sed_cmd); + $files_pi .= "$out.$pop.windowed.pi "; + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D"; + system($sed_cmd); + $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D"; + system($sed_cmd); + $files_dtajima .= "$out.$pop.Tajima.D "; + + system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); + } + system("paste $files_pi >>$out.combined.pi"); + my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt"; + system($awk_cmd); + + system("paste $files_dtajima >>$out.combined.dtajima"); + my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt"; + system($awk_cmd); +} + + + + + + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,233 @@ +<tool id="sniplay_vcftoolsslidingwindow" name="VCF tools SlidingWindow" version="1.0.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Make diversity computation with sliding window </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="0.1.12b">vcftools</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + VCFToolsSlidingWindow.pl --input $filein --out $fileout_label --step $step --window $window && mv ${fileout_label}.vcftools.log $filelog && mv ${fileout_label}.Tajima.D ${fileout_taj} && mv ${fileout_label}.TsTv ${fileout_tstv} && mv ${fileout_label}.windowed.pi ${fileout_windowed} && mv ${fileout_label}.snpden ${fileout_snp} + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> + <param name="fileout_label" type="text" value="snp_density" optional="false" label="Output file basename"/> + <param name="window" type="integer" value="200000" optional="false" label="Window size (in bp)"/> + <param name="step" type="integer" value="50000" optional="false" label="Step size (in bp)"/> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_snp" format="tabular" label="${fileout_label}.snpden" /> + <data name="fileout_taj" format="tabular" label="${fileout_label}.Tajima.D" /> + <data name="fileout_tstv" format="tabular" label="${fileout_label}.TsTv" /> + <data name="fileout_windowed" format="tabular" label="${fileout_label}.windowed.pi" /> + <data name="filelog" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.vcf" /> + <output name="fileout_annot" file="result.annotation" /> + <output name="fileout_het" file="result.het" /> + <output name="fileout_imiss" file="result.imiss" /> + <output name="fileout_sum" file="result.TsTv.summary" /> + <output name="filelog" file="result.log" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : VCFtools_ + +.. _VCFtools: http://vcftools.sourceforge.net + + | **Please cite** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, **Bioinformatics**, 2011 + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +--------------------------------------------------- + + +================ +VCF tools filter +================ + +----------- +Description +----------- + + | Compute statistics on VCF file + | For further informations on VCFtools, please visite the VCFtools website_. + +.. _website: http://vcftools.sourceforge.net + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +VCF file + VCF file with all SNPs + +---------- +Parameters +---------- + +Output file basename + Prefix for the output VCF file + +------------ +Output files +------------ + +.snpden file + SNP density along chromosomes (number of variants) + +.Tajima.D file + Tajima's D statistics + +.TsTv file + TsTv (Transition/transversion ratio) +.windowed.pi file + Nucleotide diversity Pi + +.log file + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +VCF file +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 + + +Parameters +========== + +Output name -> snp_density + + +Output files +============ + +.snpden file +------------ + +:: + + CHROM BIN_START SNP_COUNT VARIANTS/KB + chr1 0 4955 24.775 + + +.Tajima.D file +-------------- + +:: + + CHROM BIN_START N_SNPS TajimaD + chr1 0 3737 -nan + +.TsTv file +---------- + +:: + + CHROM BinStart SNP_count Ts/Tv + chr1 0 4928 1.88356 + +.windowed.pi file +----------------- + +:: + + CHROM BIN_START BIN_END N_VARIANTS PI + chr1 1 200000 3764 0.01882 + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex"> +@article{Danecek01082011, +author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group}, +title = {The variant call format and VCFtools}, +volume = {27}, +number = {15}, +pages = {2156-2158}, +year = {2011}, +doi = {10.1093/bioinformatics/btr330}, +abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk}, +URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract}, +eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html}, +journal = {Bioinformatics} +} + </citation> + + </citations> + +</tool> + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/.svn/entries Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,133 @@ +10 + +dir +41 +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy/galaxy4sniplay/VCFToolsStats +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy + + + +2014-12-10T12:42:59.154276Z +41 +gandres + + + + + + + + + + + + + + +0f93037e-e277-4375-988d-e0ab8f9fda44 + +test-data +dir + +vcfToolsStats.sh +file + + + + +2014-12-10T12:23:30.000000Z +86812effc7ac27954d308da7f96a4810 +2014-12-10T12:42:59.154276Z +41 +gandres + + + + + + + + + + + + + + + + + + + + + +536 + +VCFToolsStats.pl +file +42 + + + +2014-12-10T11:11:00.000000Z +78f59ea13bf369ed4aa3c4cfa8f3d0cd +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +1469 + +vcfToolsStats.xml +file +51 + + + +2015-02-27T08:43:06.314724Z +48fefcd253d44b0a0d9e23fa82a842fb +2015-02-27T08:46:08.150750Z +51 +gandres + + + + + + + + + + + + + + + + + + + + + +4966 + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/.svn/text-base/VCFToolsStats.pl.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/.svn/text-base/VCFToolsStats.pl.svn-base Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,71 @@ + +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <output basename> +~; +$usage .= "\n"; + +my ($input,$out); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out +); + + +die $usage + if ( !$input); + + + +my $nb_gene = `grep -c mRNA $input`; +$nb_gene =~s/\n//g; +my $nb_intergenic = `grep -c INTERGENIC $input`; +$nb_intergenic =~s/\n//g; + +my $nb_intron = `grep -c INTRON $input`; +$nb_intron =~s/\n//g; +my $nb_UTR = `grep -c UTR $input`; +$nb_UTR =~s/\n//g; +my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; + +my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`; +$nb_ns =~s/\n//g; +my $nb_s = $nb_exon - $nb_ns; + + + + +#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1"); + +open(my $G,">$out.annotation"); +print $G "Genic $nb_gene\n"; +print $G "Intergenic $nb_intergenic\n"; +print $G "========\n"; +print $G "Intron $nb_intron\n"; +print $G "Exon $nb_exon\n"; +print $G "UTR $nb_UTR\n"; +print $G "========\n"; +print $G "Non-syn $nb_ns\n"; +print $G "Synonym $nb_s\n"; +close($G); + + + + + + + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/.svn/text-base/vcfToolsStats.sh.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/.svn/text-base/vcfToolsStats.sh.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,23 @@ +#!/bin/bash + +tool_path=$(dirname $0) + +filein=$1 +fileout_label=$2 +fileout_annot=$3 +fileout_het=$4 +fileout_imiss=$5 +fileout_sum=$6 +filelog=$7 + + + +perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label + +cp $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation +cp $fileout_label.het $fileout_het ; rm $fileout_label.het +cp $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss +cp $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary + +cp vcftools.log $filelog +rm vcftools.log |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/.svn/text-base/vcfToolsStats.xml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/.svn/text-base/vcfToolsStats.xml.svn-base Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,214 @@ +<tool id="sniplay_vcftoolsstats" name="VCF tools Stats" version="1.0.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="0.1.12b">vcftools</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> + <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" /> + <data name="fileout_het" format="txt" label="${fileout_label}.het" /> + <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" /> + <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" /> + <data name="filelog" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.vcf" /> + <output name="fileout_annot" file="result.annotation" /> + <output name="fileout_het" file="result.het" /> + <output name="fileout_imiss" file="result.imiss" /> + <output name="fileout_sum" file="result.TsTv.summary" /> + <output name="filelog" file="result.log" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** + +--------------------------------------------------- + +.. class:: infomark + +**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep. + +--------------------------------------------------- + +================ +VCF tools filter +================ + +----------- +Description +----------- + + Compute statistics on VCF file + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +VCF file + VCF file with all SNPs + +---------- +Parameters +---------- + +Output file basename + Prefix for the output VCF file + +------------ +Output files +------------ + +.annotation file + Statistics on annotation/location along genome + +.het file + Statistics on heterozygosity of the individuals + +.imiss + Statistics on missing data of the inidividuals +.TsTv.summary + Statistics on mutation types and transition/transvertion number + +.log file + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +VCF file +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 + + +Parameters +========== + +Output name -> vcf_stat + + +Output files +============ + +.annotation file +---------------- + +:: + + Genic 4489 + Intergenic 466 + ======== + Intron 960 + Exon 3248 + UTR 281 + ======== + Non-syn 226 + Synonym 3022 + +.het file +--------- + +:: + + INDV O(HOM) E(HOM) N_SITES F + CATB1 0 0.0 3616 0.00000 + +.imiss file +----------- + +:: + + INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS + CATB1 4813 0 0 0 + +.TsTv.summary file +------------------ + +:: + + MODEL COUNT + AC 371 + AG 1467 + AT 562 + CG 330 + CT 1659 + GT 397 + Ts 3126 + Tv 1660 + + + </help> + +</tool> |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/VCFToolsStats.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/VCFToolsStats.pl Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,71 @@ + +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <output basename> +~; +$usage .= "\n"; + +my ($input,$out); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out +); + + +die $usage + if ( !$input); + + + +my $nb_gene = `grep -c mRNA $input`; +$nb_gene =~s/\n//g; +my $nb_intergenic = `grep -c INTERGENIC $input`; +$nb_intergenic =~s/\n//g; + +my $nb_intron = `grep -c INTRON $input`; +$nb_intron =~s/\n//g; +my $nb_UTR = `grep -c UTR $input`; +$nb_UTR =~s/\n//g; +my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; + +my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`; +$nb_ns =~s/\n//g; +my $nb_s = $nb_exon - $nb_ns; + + + + +#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1"); + +open(my $G,">$out.annotation"); +print $G "Genic $nb_gene\n"; +print $G "Intergenic $nb_intergenic\n"; +print $G "========\n"; +print $G "Intron $nb_intron\n"; +print $G "Exon $nb_exon\n"; +print $G "UTR $nb_UTR\n"; +print $G "========\n"; +print $G "Non-syn $nb_ns\n"; +print $G "Synonym $nb_s\n"; +close($G); + + + + + + + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/entries Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,232 @@ +10 + +dir +41 +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy/galaxy4sniplay/VCFToolsStats/test-data +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy + + + +2014-12-10T12:42:59.154276Z +41 +gandres + + + + + + + + + + + + + + +0f93037e-e277-4375-988d-e0ab8f9fda44 + +result.imiss +file +42 + + + +2014-12-10T12:26:44.000000Z +7dad9ffbe4c81d943dcffaac521f6d0d +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +64 + +result.annotation +file +42 + + + +2014-12-10T12:27:40.000000Z +cf07d104e04184b86115ab9dfcdd96e2 +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +98 + +result.TsTv.summary +file +42 + + + +2014-12-10T12:26:06.000000Z +7d34dffbfb7bffdebf1cc8bf9c0f1f3a +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +72 + +result.het +file +42 + + + +2014-12-10T12:27:04.000000Z +68a3973a970a8af78f4e01f5577e7dd1 +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +54 + +sample.vcf +file + + + + +2014-11-21T15:11:18.000000Z +d9c26255bcde7cb69d009f7eca1870cd +2014-12-10T12:42:59.154276Z +41 +gandres + + + + + + + + + + + + + + + + + + + + + +1622326 + +result.log +file +42 + + + +2014-12-10T12:25:25.000000Z +65e309b9a7b5c0f10b9e1bfc8f0d61bd +2014-12-10T12:44:35.507960Z +42 +gandres + + + + + + + + + + + + + + + + + + + + + +1205 + |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/result.TsTv.summary.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/result.TsTv.summary.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +MODEL COUNT +AC 371 +AG 1467 +AT 562 +CG 330 +CT 1659 +GT 397 +Ts 3126 +Tv 1660 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/result.annotation.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/result.annotation.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +Genic 4489 +Intergenic 466 +======== +Intron 960 +Exon 3248 +UTR 281 +======== +Non-syn 226 +Synonym 3022 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/result.het.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/result.het.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV O(HOM) E(HOM) N_SITES F +CATB1 0 0.0 3616 0.00000 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/result.imiss.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/result.imiss.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS +CATB1 4813 0 0 0 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/result.log.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/result.log.svn-base Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,44 @@ + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --het + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Heterozygosity + Individual Heterozygosity: Only using biallelic SNPs. +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --out vcf_stats + --TsTv-summary + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Ts/Tv summary +Ts/Tv ratio: 1.883 +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --missing-indv + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Missingness +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/.svn/text-base/sample.vcf.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/.svn/text-base/sample.vcf.svn-base Fri Jul 10 04:16:17 2015 -0400 |
[ |
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/result.TsTv.summary --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.TsTv.summary Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +MODEL COUNT +AC 371 +AG 1467 +AT 562 +CG 330 +CT 1659 +GT 397 +Ts 3126 +Tv 1660 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/result.annotation --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.annotation Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +Genic 4489 +Intergenic 466 +======== +Intron 960 +Exon 3248 +UTR 281 +======== +Non-syn 226 +Synonym 3022 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/result.het --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.het Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV O(HOM) E(HOM) N_SITES F +CATB1 0 0.0 3616 0.00000 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/result.imiss --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.imiss Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS +CATB1 4813 0 0 0 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/result.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.log Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,44 @@ + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --het + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Heterozygosity + Individual Heterozygosity: Only using biallelic SNPs. +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --out vcf_stats + --TsTv-summary + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Ts/Tv summary +Ts/Tv ratio: 1.883 +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --missing-indv + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Missingness +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/test-data/sample.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/sample.vcf Fri Jul 10 04:16:17 2015 -0400 |
[ |
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/vcfToolsStats.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/vcfToolsStats.sh Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,23 @@ +#!/bin/bash + +tool_path=$(dirname $0) + +filein=$1 +fileout_label=$2 +fileout_annot=$3 +fileout_het=$4 +fileout_imiss=$5 +fileout_sum=$6 +filelog=$7 + + + +perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label + +cp $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation +cp $fileout_label.het $fileout_het ; rm $fileout_label.het +cp $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss +cp $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary + +cp vcftools.log $filelog +rm vcftools.log |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 VCFToolsStats/vcfToolsStats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/vcfToolsStats.xml Fri Jul 10 04:16:17 2015 -0400 |
[ |
@@ -0,0 +1,243 @@ +<tool id="sniplay_vcftoolsstats" name="VCFtools Stats" version="1.0.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="0.1.12b">vcftools</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> + <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" /> + <data name="fileout_het" format="txt" label="${fileout_label}.het" /> + <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" /> + <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" /> + <data name="filelog" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.vcf" /> + <output name="fileout_annot" file="result.annotation" /> + <output name="fileout_het" file="result.het" /> + <output name="fileout_imiss" file="result.imiss" /> + <output name="fileout_sum" file="result.TsTv.summary" /> + <output name="filelog" file="result.log" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : VCFtools_ + +.. _VCFtools: http://vcftools.sourceforge.net + + | **Please cite** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, **Bioinformatics**, 2011 + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +--------------------------------------------------- + + +================ +VCF tools filter +================ + +----------- +Description +----------- + + | Compute statistics on VCF file + | For further informations on VCFtools, please visite the VCFtools website_. + +.. _website: http://vcftools.sourceforge.net + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +VCF file + VCF file with all SNPs + +---------- +Parameters +---------- + +Output file basename + Prefix for the output VCF file + +------------ +Output files +------------ + +.annotation file + Statistics on annotation/location along genome + +.het file + Statistics on heterozygosity of the individuals + +.imiss + Statistics on missing data of the inidividuals +.TsTv.summary + Statistics on mutation types and transition/transvertion number + +.log file + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +VCF file +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 + + +Parameters +========== + +Output name -> vcf_stat + + +Output files +============ + +.annotation file +---------------- + +:: + + Genic 4489 + Intergenic 466 + ======== + Intron 960 + Exon 3248 + UTR 281 + ======== + Non-syn 226 + Synonym 3022 + +.het file +--------- + +:: + + INDV O(HOM) E(HOM) N_SITES F + CATB1 0 0.0 3616 0.00000 + +.imiss file +----------- + +:: + + INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS + CATB1 4813 0 0 0 + +.TsTv.summary file +------------------ + +:: + + MODEL COUNT + AC 371 + AG 1467 + AT 562 + CG 330 + CT 1659 + GT 397 + Ts 3126 + Tv 1660 + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex"> + @article{Danecek01082011, + author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group}, + title = {The variant call format and VCFtools}, + volume = {27}, + number = {15}, + pages = {2156-2158}, + year = {2011}, + doi = {10.1093/bioinformatics/btr330}, + abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk}, + URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract}, + eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html}, + journal = {Bioinformatics} + } + </citation> + + </citations> + +</tool> |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jul 10 04:16:17 2015 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="vcftools" version="0.1.12b"> + <repository changeset_revision="a655cb1dfc58" name="package_vcftools_0_1_12b" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu/" /> + </package> +</tool_dependency> |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/VCFToolsFilter.pl --- a/vcftools_main/VCFToolFilter/VCFToolsFilter.pl Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,158 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --input <VCF input> - -o, --out <Output basename> - - <opts> are: - - -s, --samples <Samples to be analyzed. Comma separated list> - -c, --chromosomes <Chromosomes to be analyzed. Comma separated list> - -e, --export <Output format (VCF/freq/plink. Default: VCF> - -f, --frequency <Minimum MAF. Default: 0.001> - -m, --max_freq <Maximum MAF. Default: 0.5> - -a, --allow_missing <Allowed missing data proportion per site. Must be comprised between 0 and 1. Default: 1> - -n, --nb_alleles <Accepted number of alleles (min,max). Default: 2,4> - -t, --type <Type of polymorphisms to keep (ALL/SNP/INDEL). Default: ALL> - -b, --bounds <Lower bound and upper bound for a range of sites to be processed (start,end). Default: 1, 100000000> -~; -$usage .= "\n"; - -my ($input,$out); - - -#my $indel_size_max = 500; -#my $indel_size_min = 1; -my $frequency_max = 0.5; -my $frequency_min = 0.001; -my $pos_max = 100000000000; -my $pos_min = 0; -my $filter_snp_type = "all"; - -my $missing_data = 1; -my $export = "VCF"; -my $type = "ALL"; -my $nb_alleles; -my $bounds; -my $samples; -my $chromosomes; - -GetOptions( - "input=s" => \$input, - "out=s" => \$out, - "samples=s" => \$samples, - "chromosomes=s" => \$chromosomes, - "frequency=s" => \$frequency_min, - "max_freq=s" => \$frequency_max, - "allow_missing=s"=> \$missing_data, - "export=s" => \$export, - "type=s" => \$type, - "nb_alleles=s" => \$nb_alleles, - "bounds=s" => \$bounds, -); - - -die $usage - if ( !$input || !$out); - - -my @dnasamples; -if ($samples) -{ - @dnasamples = split(",",$samples); -} -my @nalleles; -if ($nb_alleles) -{ - @nalleles = split(",",$nb_alleles); -} -my @boundaries; -if ($bounds) -{ - @boundaries = split(",",$bounds); -} -my @chromosomes_list; -if ($chromosomes) -{ - @chromosomes_list = split(",",$chromosomes); -} - - -my $experiment = "chromosomes"; -my $table = ""; -my %genes; -my @snp_ids; -my @snp_ids_and_positions; -my @snp_ids_and_positions_all; -my $gene; -my $snp_num = 0; -my %ref_sequences; -my %snps_of_gene; - - - - -my $indiv_cmd = ""; -if (@dnasamples) -{ - $indiv_cmd = "--indv " . join(" --indv ",@dnasamples); -} - -my $chrom_cmd = ""; -if (@chromosomes_list) -{ - $chrom_cmd = "--chr " . join(" --chr ",@chromosomes_list); -} - -my $export_cmd = "--recode"; -if ($export eq "freq") -{ - $export_cmd = "--freq"; -} -if ($export eq "plink") -{ - $export_cmd = "--plink"; -} - - - -my $nb_alleles_cmd = "--min-alleles 1 --max-alleles 4"; -if (@nalleles) -{ - $nb_alleles_cmd = "--min-alleles $nalleles[0] --max-alleles $nalleles[1]"; -} -my $bounds_cmd = "--from-bp 1 --to-bp 100000000"; -if (@boundaries) -{ - $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]"; -} - - -my $type_cmd = ""; -if ($type eq "INDEL") -{ - $type_cmd = "--keep-only-indels"; -} -if ($type eq "SNP") -{ - $type_cmd = "--remove-indels"; -} - - -system("vcftools --vcf $input --out $out --keep-INFO-all --remove-filtered-all $type_cmd $export_cmd $chrom_cmd $indiv_cmd $nb_alleles_cmd --maf $frequency_min --max-maf $frequency_max --max-missing $missing_data >>vcftools.log 2>&1"); - - - - - - - |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/test-data/result.log --- a/vcftools_main/VCFToolFilter/test-data/result.log Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --chr chr1 - --recode-INFO-all - --maf 0.001 - --max-alleles 4 - --max-maf 0.5 - --min-alleles 2 - --max-missing 1 - --out filtered - --recode - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting VCF file... -After filtering, kept 3616 out of a possible 4955 Sites -Run Time = 0.00 seconds |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/test-data/result.vcf --- a/vcftools_main/VCFToolFilter/test-data/result.vcf Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3661 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'0012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:30,25:55:99:802,0,993\n-chr1\t188173\t.\tG\tA\t697.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.617;DP=42;Dels=0.00;FS=2.786;HaplotypeScore=1.9991;MLEAC=1;MLEAF=0.500;MQ=59.09;MQ0=0;MQRankSum=-0.013;QD=16.61;ReadPosRankSum=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/test-data/sample.vcf --- a/vcftools_main/VCFToolFilter/test-data/sample.vcf Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,5000 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/vcfToolsFilter.sh --- a/vcftools_main/VCFToolFilter/vcfToolsFilter.sh Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,46 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) - -filein=$1 -fileout_label=$2 -fileout=$3 -filelog=$4 -export=$5 -frequency=$6 -max_freq=$7 -allow_missing=$8 -nb_alleles_min=$9 -nb_alleles_max=${10} -type=${11} -bound_start=${12} -bound_end=${13} - - -if [ "${14}" != "None" ] -then samples="--samples ${14}" -fi - -if [ "${15}" != "None" ] -then chromosomes="--chromosomes ${15}" -fi - -if [ "$bound_start" -gt "$bound_end" ] -then tmp=$bound_start ; bound_start=$bound_end ; bound_end=$tmp ; echo "Warning : Lower bound must be lower than greater bound!" >&2 -fi - -if [ "$nb_alleles_min" -gt "$nb_alleles_max" ] -then tmp=$nb_alleles_min ; nb_alleles_min=$nb_alleles_max ; nb_alleles_max=$tmp ; echo "Warning : Minimum number of alleles must be lower than maximum number of allele!" >&2 -fi - -perl $tool_path/VCFToolsFilter.pl --input $filein --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --nb_alleles $nb_alleles_min','$nb_alleles_max --type $type --bounds $bound_start','$bound_end $samples $chromosomes - -if [ "$export" = "VCF" ] -then cp $fileout_label.recode.vcf $fileout ; rm $fileout_label.recode.vcf -elif [ "$export" = "freq" ] -then cp $fileout_label.frq $fileout ; rm $fileout_label.frq -else cp $fileout_label.ped $fileout; cp $fileout_label.map ${16} ; rm $fileout_label.ped $fileout_label.map -fi - -cp vcftools.log $filelog -rm vcftools.log |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolFilter/vcfToolsFilter.xml --- a/vcftools_main/VCFToolFilter/vcfToolsFilter.xml Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,276 +0,0 @@\n-<tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">\n- \n- <!-- [REQUIRED] Tool description displayed after the tool name -->\n- <description> </description>\n- \n- <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n- <requirements>\n- <requirement type="binary">perl</requirement>\n-\t<requirement type="package" version="0.1.12b">vcftools</requirement>\n- </requirements>\n- \n- <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n- <version_command>\n-<!--\n- tool_binary -v\n--->\n- </version_command>\n- \n- <!-- [REQUIRED] The command to execute -->\n- <command interpreter="perl">\n-\tvcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end\n-\t#if str( $samples ) == "":\n-\t\'None\'\n-\t#else\n-\t$samples\n-\t#end if\n-\t#if str( $chromosomes ) == "":\n-\t\'None\'\n-\t#else\n-\t$chromosomes\n-\t#end if\n-\t#if str( $export ) == "plink":\n-\t$fileout_map\n-\t#else\n-\t\'\'\n-\t#end if\n- </command>\n- \n- <!-- [REQUIRED] Input files and tool parameters -->\n- <inputs>\n-\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n-\t<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>\n-\t<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">\n-\t\t<validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n-\t</param>\n-\t<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">\n-\t <validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n- </param>\n-\t<param name="export" type="select" label="Output format" >\n-\t <option value="VCF" selected="true">VCF</option>\n-\t <option value="freq">freq</option>\n- <option value="plink">plink</option>\n- </param>\n-\t<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />\n-\t<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />\n-\t<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />\n-\t<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />\n-\t<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />\n- <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >\n- <option value="ALL" selected="true">All</option>\n- <option value="SNP">SNP</option>\n- <option value="INDEL">Indel</option>\n- </param>\n-\t<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />\n-\t<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />\n- </inputs>\n- \n- <!-- [REQUIRED] Output files -->\n- <outputs>\n-\t<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)==\'plink\' then \'ped\' else \'\' # #if str($export)==\'freq\' then \'frq\' else \'\' # #if str($export)==\'VCF\' then \'vcf\' else \'\' #" >\n-\t\t<change_format>\n- \t<when input="export" value="freq" format="tabular" />\n-\t\t\t<when input="export" value="plink" format="txt" />\n-\t\t</change_format>\t\n-\t</data>\n-\t<data name="fileout_map" format="txt" label="${fileout_label}.map">\n-\t\t<filter>(export == \'plink\')</filter>\n-\t</data>\n-\t<data name="filelog" format="txt" label="${fileout_label}.log" />\n'..b': .. _VCFtools: http://vcftools.sourceforge.net\n-\n- | ** Please cite ** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, ** Bioinformatics **, 2011 \n-\n-.. class:: infomark\n-\n-**Galaxy integration** Andres Gwendoline, Institut Fran\xc3\xa7ais de Bioinformatique.\n-\n-.. class:: infomark\n-\n-**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr\n-\n----------------------------------------------------\n-\n-\n-\n-================\n-VCF tools filter\n-================\n-\n------------\n-Description\n------------\n-\n- | Filter VCF file \n- | For further informations on VCFtools, please visite the VCFtools website.\r\n- | .. _VCFtools: http://vcftools.sourceforge.net\n-\n------------------\n-Workflow position\n------------------\n-\n-**Upstream tools**\n-\n-=========== ========================== =======\n-Name output file(s) format \n-=========== ========================== =======\n-=========== ========================== =======\n-\n-\n-**Downstream tools**\n-\n-=========== ========================== =======\n-Name output file(s) format\n-=========== ========================== =======\n-=========== ========================== =======\n-\n-\n-----------\n-Input file\n-----------\n-\n-VCF file\n-\tVCF file with all SNPs\n-\n-----------\n-Parameters\n-----------\n-\n-Output file basename\n-\tPrefix for the output VCF file\n-\n-Samples\n- Samples to be analyzed. Comma separated list\n-\n-Chromosomes\n-\tChromosomes to be analyzed. Comma separated list\n-\n-Output format\n-\tVCF/freq/plink\n-\n-Minimum MAF\n-\tMinimum frequency\n-\n-Maximum MAF\n-\tMaximum frequency\n-\n-Missing data proportion\n-\tAllowed missing data proportion per site. Must be comprised between 0 and 1.\n-\n-Number of alleles\n-\tAccepted number of alleles min and max.\n-\n-Polymorphisms\n-\tType of polymorphisms to keep (ALL/SNP/INDEL).\n-Bounds\n-\tLower bound and upper bound for a range of sites to be processed.\n-\n-------------\n-Output files\n-------------\n-\n-VCF file\n-\tVCF file filtered \n-\n-Log file\n-\n----------------------------------------------------\n-\n----------------\n-Working example\n----------------\n-\n-Input files\n-===========\n-\n-VCF file\n----------\n-\n-::\n-\n-\t#fileformat=VCFv4.1\n-\t#FILTER=<ID=LowQual,Description="Low quality">\n-\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-\t[...]\n-\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n-\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n-\n-\n-Parameters\n-==========\n-\n-Output name -> filtered_chr1\n-\n-Chromosomes -> chr1\n-\n-Output format -> VCF\n-\n-Minimum MAF -> 0.001\n-\n-Maximum MAF -> 0.5\n-\n-Missing data proportion -> 1\n-\n-Number of alleles min -> 2\n-\n-Number of alleles max -> 4\n-\n-Polymorphisms -> All\n-\n-Lower bound -> 1\n-\n-Upper bound -> 100000000\n-\n-\n-Output files\n-============\n-\n-filtered_genelist_intron.vcf\n-----------------------------\n-\n-::\n-\n- #fileformat=VCFv4.1\n- #FILTER=<ID=LowQual,Description="Low quality">\n- #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n- [...]\n- CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1\n-\tchr1\t5059\t.\tC\tG\t146.84\t.\tAC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,8:8:18:175,18,0\n-\n-\n- </help>\n- \n-</tool>\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl --- a/vcftools_main/VCFToolsSlidingWindow/VCFToolsSlidingWindow.pl Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,122 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --input <VCF input> - -o, --out <output basename> - -s, --step <step size> - -w, --window <window size> - -<opts> are: - -g, --group <group file> -~; -$usage .= "\n"; - -my ($input,$out); -my $window = 200000; -my $step = 200000; -my $groupfile; -GetOptions( - "input=s" => \$input, - "out=s" => \$out, - "step=s" => \$step, - "window=s" => \$window, - "group=s" => \$groupfile -); - - - - -die $usage - if ( !$input); - -my %hash; -my $cmd_part = ""; -if ($groupfile && -e $groupfile) -{ - open(my $G,$groupfile) or die "Cannot open $groupfile: $!"; - while(<$G>) - { - my $line = $_; - chomp($line); - $line=~s/\r//g; - $line=~s/\n//g; - my @infos = split(/;/,$line); - if ($infos[0] && $infos[1]) - { - $hash{$infos[1]} .= " --indv " . $infos[0]; - $cmd_part .= " --indv " . $infos[0]; - } - } - close($G); -} - - -if ($step =~/^(\d+)\s*$/){ - $step = $1; -} -else{ - die "Error: step size must be an integer\n"; -} -if ($window =~/^(\d+)\s*$/){ - $window = $1; -} -else{ - die "Error: window size must be an integer\n"; -} - - -my $VCFTOOLS_EXE = "vcftools"; - -system("vcf-sort $input >$input.sorted"); - - - -system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --window-pi $window --window-pi-step $step >>$out.vcftools.log 2>&1"); -system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TajimaD $window >>$out.vcftools.log 2>&1"); -system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --TsTv $window >>$out.vcftools.log 2>&1"); -system("$VCFTOOLS_EXE --vcf $input.sorted --out $out --SNPdensity $window >>$out.vcftools.log 2>&1"); - -if (keys(%hash) > 0) -{ - my $files_pi = ""; - my $files_dtajima = ""; - foreach my $pop(sort(keys(%hash))) - { - my $cmd_part = $hash{$pop}; - system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --window-pi $window --window-pi-step $step $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); - my $sed_cmd = "sed -i \"s\/PI\/$pop\/g\" $out.$pop.windowed.pi"; - system($sed_cmd); - $files_pi .= "$out.$pop.windowed.pi "; - - system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --SNPdensity $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); - - system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TajimaD $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); - my $sed_cmd = "sed -i \"s\/TajimaD\/$pop\/g\" $out.$pop.Tajima.D"; - system($sed_cmd); - $sed_cmd = "sed -i \"s/nan/0/g\" $out.Tajima.D"; - system($sed_cmd); - $files_dtajima .= "$out.$pop.Tajima.D "; - - system("$VCFTOOLS_EXE --vcf $input.sorted --remove-filtered-all --out $out.$pop --TsTv $window $cmd_part --maf 0.001 >>$out.vcftools.log 2>&1"); - } - system("paste $files_pi >>$out.combined.pi"); - my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$5\"\t\"\$10'} $out.combined.pi >$out.combined.pi.txt"; - system($awk_cmd); - - system("paste $files_dtajima >>$out.combined.dtajima"); - my $awk_cmd = "awk {'print \$1\"\t\"\$2\"\t\"\$4\"\t\"\$8'} $out.combined.dtajima >$out.combined.dtajima.txt"; - system($awk_cmd); -} - - - - - - |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml --- a/vcftools_main/VCFToolsSlidingWindow/vcfToolsSlidingWindow.xml Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,210 +0,0 @@ -<tool id="sniplay_vcftoolsslidingwindow" name="VCF tools SlidingWindow" version="1.0.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Make diversity computation with sliding window </description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package" version="0.1.12b">vcftools</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="perl"> - VCFToolsSlidingWindow.pl --input $filein --out $fileout_label --step $step --window $window && mv ${fileout_label}.vcftools.log $filelog && mv ${fileout_label}.Tajima.D ${fileout_taj} && mv ${fileout_label}.TsTv ${fileout_tstv} && mv ${fileout_label}.windowed.pi ${fileout_windowed} && mv ${fileout_label}.snpden ${fileout_snp} - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> - <param name="fileout_label" type="text" value="snp_density" optional="false" label="Output file basename"/> - <param name="window" type="integer" value="200000" optional="false" label="Window size (in bp)"/> - <param name="step" type="integer" value="50000" optional="false" label="Step size (in bp)"/> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout_snp" format="tabular" label="${fileout_label}.snpden" /> - <data name="fileout_taj" format="tabular" label="${fileout_label}.Tajima.D" /> - <data name="fileout_tstv" format="tabular" label="${fileout_label}.TsTv" /> - <data name="fileout_windowed" format="tabular" label="${fileout_label}.windowed.pi" /> - <data name="filelog" format="txt" label="${fileout_label}.log" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <test> - <param name="filein" value="sample.vcf" /> - <output name="fileout_annot" file="result.annotation" /> - <output name="fileout_het" file="result.het" /> - <output name="fileout_imiss" file="result.imiss" /> - <output name="fileout_sum" file="result.TsTv.summary" /> - <output name="filelog" file="result.log" /> - </test> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - -.. class:: infomark - -**Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : .. _VCFtools: http://vcftools.sourceforge.net - - | ** Please cite ** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, ** Bioinformatics **, 2011 - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - ---------------------------------------------------- - - -================ -VCF tools filter -================ - ------------ -Description ------------ - - | Compute statistics on VCF file - | For further informations on VCFtools, please visite the VCFtools website. - | .. _VCFtools: http://vcftools.sourceforge.net - ------------------ -Workflow position ------------------ - -**Upstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - -**Downstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - ----------- -Input file ----------- - -VCF file - VCF file with all SNPs - ----------- -Parameters ----------- - -Output file basename - Prefix for the output VCF file - ------------- -Output files ------------- - -.snpden file - SNP density along chromosomes (number of variants) - -.Tajima.D file - Tajima's D statistics - -.TsTv file - TsTv (Transition/transversion ratio) -.windowed.pi file - Nucleotide diversity Pi - -.log file - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -VCF file ---------- - -:: - - #fileformat=VCFv4.1 - #FILTER=<ID=LowQual,Description="Low quality"> - #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> - [...] - CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 - chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 - - -Parameters -========== - -Output name -> snp_density - - -Output files -============ - -.snpden file ------------- - -:: - - CHROM BIN_START SNP_COUNT VARIANTS/KB - chr1 0 4955 24.775 - - -.Tajima.D file --------------- - -:: - - CHROM BIN_START N_SNPS TajimaD - chr1 0 3737 -nan - -.TsTv file ----------- - -:: - - CHROM BinStart SNP_count Ts/Tv - chr1 0 4928 1.88356 - -.windowed.pi file ------------------ - -:: - - CHROM BIN_START BIN_END N_VARIANTS PI - chr1 1 200000 3764 0.01882 - - - </help> - -</tool> |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/VCFToolsStats.pl --- a/vcftools_main/VCFToolsStats/VCFToolsStats.pl Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,71 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --input <VCF input> - -o, --out <output basename> -~; -$usage .= "\n"; - -my ($input,$out); - -GetOptions( - "input=s" => \$input, - "out=s" => \$out -); - - -die $usage - if ( !$input); - - - -my $nb_gene = `grep -c mRNA $input`; -$nb_gene =~s/\n//g; -my $nb_intergenic = `grep -c INTERGENIC $input`; -$nb_intergenic =~s/\n//g; - -my $nb_intron = `grep -c INTRON $input`; -$nb_intron =~s/\n//g; -my $nb_UTR = `grep -c UTR $input`; -$nb_UTR =~s/\n//g; -my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; - -my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`; -$nb_ns =~s/\n//g; -my $nb_s = $nb_exon - $nb_ns; - - - - -#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1"); - -open(my $G,">$out.annotation"); -print $G "Genic $nb_gene\n"; -print $G "Intergenic $nb_intergenic\n"; -print $G "========\n"; -print $G "Intron $nb_intron\n"; -print $G "Exon $nb_exon\n"; -print $G "UTR $nb_UTR\n"; -print $G "========\n"; -print $G "Non-syn $nb_ns\n"; -print $G "Synonym $nb_s\n"; -close($G); - - - - - - - |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/result.TsTv.summary --- a/vcftools_main/VCFToolsStats/test-data/result.TsTv.summary Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -MODEL COUNT -AC 371 -AG 1467 -AT 562 -CG 330 -CT 1659 -GT 397 -Ts 3126 -Tv 1660 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/result.annotation --- a/vcftools_main/VCFToolsStats/test-data/result.annotation Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -Genic 4489 -Intergenic 466 -======== -Intron 960 -Exon 3248 -UTR 281 -======== -Non-syn 226 -Synonym 3022 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/result.het --- a/vcftools_main/VCFToolsStats/test-data/result.het Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -INDV O(HOM) E(HOM) N_SITES F -CATB1 0 0.0 3616 0.00000 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/result.imiss --- a/vcftools_main/VCFToolsStats/test-data/result.imiss Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS -CATB1 4813 0 0 0 |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/result.log --- a/vcftools_main/VCFToolsStats/test-data/result.log Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --het - --out vcf_stats - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Individual Heterozygosity - Individual Heterozygosity: Only using biallelic SNPs. -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --out vcf_stats - --TsTv-summary - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Ts/Tv summary -Ts/Tv ratio: 1.883 -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --missing-indv - --out vcf_stats - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Individual Missingness -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/test-data/sample.vcf --- a/vcftools_main/VCFToolsStats/test-data/sample.vcf Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,5000 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/vcfToolsStats.sh --- a/vcftools_main/VCFToolsStats/vcfToolsStats.sh Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,23 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) - -filein=$1 -fileout_label=$2 -fileout_annot=$3 -fileout_het=$4 -fileout_imiss=$5 -fileout_sum=$6 -filelog=$7 - - - -perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label - -cp $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation -cp $fileout_label.het $fileout_het ; rm $fileout_label.het -cp $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss -cp $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary - -cp vcftools.log $filelog -rm vcftools.log |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/VCFToolsStats/vcfToolsStats.xml --- a/vcftools_main/VCFToolsStats/vcfToolsStats.xml Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,221 +0,0 @@ -<tool id="sniplay_vcftoolsstats" name="VCFtools Stats" version="1.0.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> </description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package" version="0.1.12b">vcftools</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="perl"> - vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> - <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" /> - <data name="fileout_het" format="txt" label="${fileout_label}.het" /> - <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" /> - <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" /> - <data name="filelog" format="txt" label="${fileout_label}.log" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <test> - <param name="filein" value="sample.vcf" /> - <output name="fileout_annot" file="result.annotation" /> - <output name="fileout_het" file="result.het" /> - <output name="fileout_imiss" file="result.imiss" /> - <output name="fileout_sum" file="result.TsTv.summary" /> - <output name="filelog" file="result.log" /> - </test> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - -.. class:: infomark - -**Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : .. _VCFtools: http://vcftools.sourceforge.net - - | ** Please cite ** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, ** Bioinformatics **, 2011 - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - ---------------------------------------------------- - - -================ -VCF tools filter -================ - ------------ -Description ------------ - - | Compute statistics on VCF file - | For further informations on VCFtools, please visite the VCFtools website. - | .. _VCFtools: http://vcftools.sourceforge.net - ------------------ -Workflow position ------------------ - -**Upstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - -**Downstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - ----------- -Input file ----------- - -VCF file - VCF file with all SNPs - ----------- -Parameters ----------- - -Output file basename - Prefix for the output VCF file - ------------- -Output files ------------- - -.annotation file - Statistics on annotation/location along genome - -.het file - Statistics on heterozygosity of the individuals - -.imiss - Statistics on missing data of the inidividuals -.TsTv.summary - Statistics on mutation types and transition/transvertion number - -.log file - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -VCF file ---------- - -:: - - #fileformat=VCFv4.1 - #FILTER=<ID=LowQual,Description="Low quality"> - #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> - [...] - CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 - chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 - - -Parameters -========== - -Output name -> vcf_stat - - -Output files -============ - -.annotation file ----------------- - -:: - - Genic 4489 - Intergenic 466 - ======== - Intron 960 - Exon 3248 - UTR 281 - ======== - Non-syn 226 - Synonym 3022 - -.het file ---------- - -:: - - INDV O(HOM) E(HOM) N_SITES F - CATB1 0 0.0 3616 0.00000 - -.imiss file ------------ - -:: - - INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS - CATB1 4813 0 0 0 - -.TsTv.summary file ------------------- - -:: - - MODEL COUNT - AC 371 - AG 1467 - AT 562 - CG 330 - CT 1659 - GT 397 - Ts 3126 - Tv 1660 - - - </help> - -</tool> |
b |
diff -r 0f67ed444d47 -r ac7c9e40d601 vcftools_main/tool_dependencies.xml --- a/vcftools_main/tool_dependencies.xml Thu Jul 02 11:07:45 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="vcftools" version="0.1.12b"> - <repository changeset_revision="a655cb1dfc58" name="package_vcftools_0_1_12b" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu/" /> - </package> -</tool_dependency> |