Mercurial > repos > halley > gatk_2_7
view GenomeAnalysisTK-2.7-2-g6bda569/resources/CheckPileup.java @ 0:1485d70afa12 draft default tip
Uploaded
author | halley |
---|---|
date | Tue, 15 Oct 2013 03:09:34 -0400 |
parents | |
children |
line wrap: on
line source
/* * Copyright (c) 2012 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; import java.util.Arrays; /** * At every locus in the input set, compares the pileup data (reference base, aligned base from * each overlapping read, and quality score) to the reference pileup data generated by samtools. Samtools' pileup data * should be specified using the command-line argument '-pileup:SAMPileup <your sam pileup file>'. */ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_QC, extraDocs = {CommandLineGATK.class} ) @Requires(value={DataSource.READS,DataSource.REFERENCE}) public class CheckPileup extends LocusWalker<Integer, CheckPileupStats> implements TreeReducible<CheckPileupStats> { @Input(fullName = "pileup", doc="The SAMPileup containing the expected output", required = true) RodBinding<SAMPileupFeature> pileup; @Output private PrintStream out; @Argument(fullName="continue_after_error",doc="Continue after an error",required=false) public boolean CONTINUE_AFTER_AN_ERROR = false; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { ReadBackedPileup pileup = context.getBasePileup(); SAMPileupFeature truePileup = getTruePileup( tracker ); if ( truePileup == null ) { out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBaseAsChar())); if ( ! CONTINUE_AFTER_AN_ERROR ) { throw new UserException.CommandLineException(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases", context.getLocation(), new String(pileup.getBases()))); } } else { String pileupDiff = pileupDiff(pileup, truePileup, true); if ( pileupDiff != null ) { out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBaseAsChar()), truePileup.getPileupString()); if ( ! CONTINUE_AFTER_AN_ERROR ) { throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff)); } } } return pileup.getNumberOfElements(); } private static String maybeSorted( final String x, boolean sortMe ) { if ( sortMe ) { byte[] bytes = x.getBytes(); Arrays.sort(bytes); return new String(bytes); } else return x; } public String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent) { if ( a.getNumberOfElements() != b.size() ) return "Sizes not equal"; GenomeLoc featureLocation = getToolkit().getGenomeLocParser().createGenomeLoc(b.getChr(),b.getStart(),b.getEnd()); if ( a.getLocation().compareTo(featureLocation) != 0 ) return "Locations not equal"; String aBases = maybeSorted(new String(a.getBases()), ! orderDependent ); String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent ); if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) ) return "Bases not equal"; String aQuals = maybeSorted(new String(a.getQuals()), ! orderDependent ); String bQuals = maybeSorted(new String(b.getQuals()), ! orderDependent ); if ( ! aQuals.equals(bQuals) ) return "Quals not equal"; return null; } // Given result of map function public CheckPileupStats reduceInit() { return new CheckPileupStats(); } public CheckPileupStats reduce(Integer value, CheckPileupStats sum) { sum.nLoci++; sum.nBases += value; return sum; } public CheckPileupStats treeReduce( CheckPileupStats lhs, CheckPileupStats rhs ) { CheckPileupStats combined = new CheckPileupStats(); combined.nLoci = lhs.nLoci + rhs.nLoci; combined.nBases = lhs.nBases + rhs.nBases; return combined; } /** * Extracts the true pileup data from the given rodSAMPileup. Note that this implementation * assumes that the genotype will only be point or indel. * @param tracker ROD tracker from which to extract pileup data. * @return True pileup data. */ private SAMPileupFeature getTruePileup( RefMetaDataTracker tracker ) { SAMPileupFeature pileupArg = tracker.getFirstValue(pileup); if( pileupArg == null) return null; if( pileupArg.hasPointGenotype() ) return pileupArg.getPointGenotype(); else if( pileupArg.hasIndelGenotype() ) return pileupArg.getIndelGenotype(); else throw new ReviewedStingException("Unsupported pileup type: " + pileupArg); } } class CheckPileupStats { public long nLoci = 0; public long nBases = 0; public CheckPileupStats() { } public String toString() { return String.format("Validated %d sites covered by %d bases%n", nLoci, nBases); } }