annotate GenomeAnalysisTK-2.7-2-g6bda569/resources/PrintReads.java @ 0:1485d70afa12 draft default tip

Uploaded
author halley
date Tue, 15 Oct 2013 03:09:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1485d70afa12 Uploaded
halley
parents:
diff changeset
1 /*
1485d70afa12 Uploaded
halley
parents:
diff changeset
2 * Copyright (c) 2012 The Broad Institute
1485d70afa12 Uploaded
halley
parents:
diff changeset
3 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
4 * Permission is hereby granted, free of charge, to any person
1485d70afa12 Uploaded
halley
parents:
diff changeset
5 * obtaining a copy of this software and associated documentation
1485d70afa12 Uploaded
halley
parents:
diff changeset
6 * files (the "Software"), to deal in the Software without
1485d70afa12 Uploaded
halley
parents:
diff changeset
7 * restriction, including without limitation the rights to use,
1485d70afa12 Uploaded
halley
parents:
diff changeset
8 * copy, modify, merge, publish, distribute, sublicense, and/or sell
1485d70afa12 Uploaded
halley
parents:
diff changeset
9 * copies of the Software, and to permit persons to whom the
1485d70afa12 Uploaded
halley
parents:
diff changeset
10 * Software is furnished to do so, subject to the following
1485d70afa12 Uploaded
halley
parents:
diff changeset
11 * conditions:
1485d70afa12 Uploaded
halley
parents:
diff changeset
12 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
13 * The above copyright notice and this permission notice shall be
1485d70afa12 Uploaded
halley
parents:
diff changeset
14 * included in all copies or substantial portions of the Software.
1485d70afa12 Uploaded
halley
parents:
diff changeset
15 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1485d70afa12 Uploaded
halley
parents:
diff changeset
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1485d70afa12 Uploaded
halley
parents:
diff changeset
18 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1485d70afa12 Uploaded
halley
parents:
diff changeset
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
1485d70afa12 Uploaded
halley
parents:
diff changeset
20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
1485d70afa12 Uploaded
halley
parents:
diff changeset
21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1485d70afa12 Uploaded
halley
parents:
diff changeset
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
1485d70afa12 Uploaded
halley
parents:
diff changeset
23 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1485d70afa12 Uploaded
halley
parents:
diff changeset
24 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
25
1485d70afa12 Uploaded
halley
parents:
diff changeset
26 package org.broadinstitute.sting.gatk.walkers.readutils;
1485d70afa12 Uploaded
halley
parents:
diff changeset
27
1485d70afa12 Uploaded
halley
parents:
diff changeset
28 import net.sf.samtools.SAMFileWriter;
1485d70afa12 Uploaded
halley
parents:
diff changeset
29 import net.sf.samtools.SAMReadGroupRecord;
1485d70afa12 Uploaded
halley
parents:
diff changeset
30 import org.broadinstitute.sting.commandline.Argument;
1485d70afa12 Uploaded
halley
parents:
diff changeset
31 import org.broadinstitute.sting.commandline.Hidden;
1485d70afa12 Uploaded
halley
parents:
diff changeset
32 import org.broadinstitute.sting.commandline.Output;
1485d70afa12 Uploaded
halley
parents:
diff changeset
33 import org.broadinstitute.sting.gatk.CommandLineGATK;
1485d70afa12 Uploaded
halley
parents:
diff changeset
34 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
1485d70afa12 Uploaded
halley
parents:
diff changeset
35 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
1485d70afa12 Uploaded
halley
parents:
diff changeset
36 import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
1485d70afa12 Uploaded
halley
parents:
diff changeset
37 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
1485d70afa12 Uploaded
halley
parents:
diff changeset
38 import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
1485d70afa12 Uploaded
halley
parents:
diff changeset
39 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
1485d70afa12 Uploaded
halley
parents:
diff changeset
40 import org.broadinstitute.sting.gatk.walkers.*;
1485d70afa12 Uploaded
halley
parents:
diff changeset
41 import org.broadinstitute.sting.utils.SampleUtils;
1485d70afa12 Uploaded
halley
parents:
diff changeset
42 import org.broadinstitute.sting.utils.Utils;
1485d70afa12 Uploaded
halley
parents:
diff changeset
43 import org.broadinstitute.sting.utils.baq.BAQ;
1485d70afa12 Uploaded
halley
parents:
diff changeset
44 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
1485d70afa12 Uploaded
halley
parents:
diff changeset
45 import org.broadinstitute.sting.utils.help.HelpConstants;
1485d70afa12 Uploaded
halley
parents:
diff changeset
46 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
1485d70afa12 Uploaded
halley
parents:
diff changeset
47
1485d70afa12 Uploaded
halley
parents:
diff changeset
48 import java.io.File;
1485d70afa12 Uploaded
halley
parents:
diff changeset
49 import java.util.*;
1485d70afa12 Uploaded
halley
parents:
diff changeset
50
1485d70afa12 Uploaded
halley
parents:
diff changeset
51 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
52 * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
1485d70afa12 Uploaded
halley
parents:
diff changeset
53 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
54 * <p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
55 * PrintReads can dynamically merge the contents of multiple input BAM files, resulting
1485d70afa12 Uploaded
halley
parents:
diff changeset
56 * in merged output sorted in coordinate order. Can also optionally filter reads based on the
1485d70afa12 Uploaded
halley
parents:
diff changeset
57 * --read_filter command line argument.
1485d70afa12 Uploaded
halley
parents:
diff changeset
58 * </p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
59 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
60 * <p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
61 * Note that when PrintReads is used as part of the Base Quality Score Recalibration workflow,
1485d70afa12 Uploaded
halley
parents:
diff changeset
62 * it takes the --BQSR engine argument, which is listed under Inherited Arguments > CommandLineGATK below.
1485d70afa12 Uploaded
halley
parents:
diff changeset
63 * </p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
64 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
65 * <h3>Input</h3>
1485d70afa12 Uploaded
halley
parents:
diff changeset
66 * <p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
67 * One or more bam files.
1485d70afa12 Uploaded
halley
parents:
diff changeset
68 * </p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
69 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
70 * <h3>Output</h3>
1485d70afa12 Uploaded
halley
parents:
diff changeset
71 * <p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
72 * A single processed bam file.
1485d70afa12 Uploaded
halley
parents:
diff changeset
73 * </p>
1485d70afa12 Uploaded
halley
parents:
diff changeset
74 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
75 * <h3>Examples</h3>
1485d70afa12 Uploaded
halley
parents:
diff changeset
76 * <pre>
1485d70afa12 Uploaded
halley
parents:
diff changeset
77 * java -Xmx2g -jar GenomeAnalysisTK.jar \
1485d70afa12 Uploaded
halley
parents:
diff changeset
78 * -R ref.fasta \
1485d70afa12 Uploaded
halley
parents:
diff changeset
79 * -T PrintReads \
1485d70afa12 Uploaded
halley
parents:
diff changeset
80 * -o output.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
81 * -I input1.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
82 * -I input2.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
83 * --read_filter MappingQualityZero
1485d70afa12 Uploaded
halley
parents:
diff changeset
84 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
85 * // Prints the first 2000 reads in the BAM file
1485d70afa12 Uploaded
halley
parents:
diff changeset
86 * java -Xmx2g -jar GenomeAnalysisTK.jar \
1485d70afa12 Uploaded
halley
parents:
diff changeset
87 * -R ref.fasta \
1485d70afa12 Uploaded
halley
parents:
diff changeset
88 * -T PrintReads \
1485d70afa12 Uploaded
halley
parents:
diff changeset
89 * -o output.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
90 * -I input.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
91 * -n 2000
1485d70afa12 Uploaded
halley
parents:
diff changeset
92 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
93 * // Downsamples BAM file to 25%
1485d70afa12 Uploaded
halley
parents:
diff changeset
94 * java -Xmx2g -jar GenomeAnalysisTK.jar \
1485d70afa12 Uploaded
halley
parents:
diff changeset
95 * -R ref.fasta \
1485d70afa12 Uploaded
halley
parents:
diff changeset
96 * -T PrintReads \
1485d70afa12 Uploaded
halley
parents:
diff changeset
97 * -o output.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
98 * -I input.bam \
1485d70afa12 Uploaded
halley
parents:
diff changeset
99 * -dfrac 0.25
1485d70afa12 Uploaded
halley
parents:
diff changeset
100 * </pre>
1485d70afa12 Uploaded
halley
parents:
diff changeset
101 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
102 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
103 @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
1485d70afa12 Uploaded
halley
parents:
diff changeset
104 @ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
1485d70afa12 Uploaded
halley
parents:
diff changeset
105 @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
1485d70afa12 Uploaded
halley
parents:
diff changeset
106 @Requires({DataSource.READS, DataSource.REFERENCE})
1485d70afa12 Uploaded
halley
parents:
diff changeset
107 public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements NanoSchedulable {
1485d70afa12 Uploaded
halley
parents:
diff changeset
108
1485d70afa12 Uploaded
halley
parents:
diff changeset
109 @Output(doc="Write output to this BAM filename instead of STDOUT")
1485d70afa12 Uploaded
halley
parents:
diff changeset
110 StingSAMFileWriter out;
1485d70afa12 Uploaded
halley
parents:
diff changeset
111
1485d70afa12 Uploaded
halley
parents:
diff changeset
112 @Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
113 String readGroup = null;
1485d70afa12 Uploaded
halley
parents:
diff changeset
114
1485d70afa12 Uploaded
halley
parents:
diff changeset
115 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
116 * For example, --platform ILLUMINA or --platform 454.
1485d70afa12 Uploaded
halley
parents:
diff changeset
117 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
118 @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
119 String platform = null;
1485d70afa12 Uploaded
halley
parents:
diff changeset
120
1485d70afa12 Uploaded
halley
parents:
diff changeset
121 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
122 * Only prints the first n reads of the file
1485d70afa12 Uploaded
halley
parents:
diff changeset
123 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
124 @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
125 int nReadsToPrint = -1;
1485d70afa12 Uploaded
halley
parents:
diff changeset
126
1485d70afa12 Uploaded
halley
parents:
diff changeset
127 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
128 * Only reads from samples listed in the provided file(s) will be included in the output.
1485d70afa12 Uploaded
halley
parents:
diff changeset
129 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
130 @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
131 public Set<File> sampleFile = new TreeSet<File>();
1485d70afa12 Uploaded
halley
parents:
diff changeset
132
1485d70afa12 Uploaded
halley
parents:
diff changeset
133 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
134 * Only reads from the sample(s) will be included in the output.
1485d70afa12 Uploaded
halley
parents:
diff changeset
135 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
136 @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
137 public Set<String> sampleNames = new TreeSet<String>();
1485d70afa12 Uploaded
halley
parents:
diff changeset
138
1485d70afa12 Uploaded
halley
parents:
diff changeset
139 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
140 * Erase all extra attributes in the read but keep the read group information
1485d70afa12 Uploaded
halley
parents:
diff changeset
141 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
142 @Argument(fullName="simplify", shortName="s", doc="Simplify all reads.", required=false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
143 public boolean simplifyReads = false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
144
1485d70afa12 Uploaded
halley
parents:
diff changeset
145 @Hidden
1485d70afa12 Uploaded
halley
parents:
diff changeset
146 @Argument(fullName = "no_pg_tag", shortName = "npt", doc ="", required = false)
1485d70afa12 Uploaded
halley
parents:
diff changeset
147 public boolean NO_PG_TAG = false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
148
1485d70afa12 Uploaded
halley
parents:
diff changeset
149 List<ReadTransformer> readTransformers = Collections.emptyList();
1485d70afa12 Uploaded
halley
parents:
diff changeset
150 private TreeSet<String> samplesToChoose = new TreeSet<String>();
1485d70afa12 Uploaded
halley
parents:
diff changeset
151 private boolean SAMPLES_SPECIFIED = false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
152
1485d70afa12 Uploaded
halley
parents:
diff changeset
153 public static final String PROGRAM_RECORD_NAME = "GATK PrintReads"; // The name that will go in the @PG tag
1485d70afa12 Uploaded
halley
parents:
diff changeset
154
1485d70afa12 Uploaded
halley
parents:
diff changeset
155 Random random;
1485d70afa12 Uploaded
halley
parents:
diff changeset
156
1485d70afa12 Uploaded
halley
parents:
diff changeset
157
1485d70afa12 Uploaded
halley
parents:
diff changeset
158 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
159 * The initialize function.
1485d70afa12 Uploaded
halley
parents:
diff changeset
160 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
161 public void initialize() {
1485d70afa12 Uploaded
halley
parents:
diff changeset
162 final GenomeAnalysisEngine toolkit = getToolkit();
1485d70afa12 Uploaded
halley
parents:
diff changeset
163
1485d70afa12 Uploaded
halley
parents:
diff changeset
164 if ( platform != null )
1485d70afa12 Uploaded
halley
parents:
diff changeset
165 platform = platform.toUpperCase();
1485d70afa12 Uploaded
halley
parents:
diff changeset
166
1485d70afa12 Uploaded
halley
parents:
diff changeset
167 if ( getToolkit() != null )
1485d70afa12 Uploaded
halley
parents:
diff changeset
168 readTransformers = getToolkit().getReadTransformers();
1485d70afa12 Uploaded
halley
parents:
diff changeset
169
1485d70afa12 Uploaded
halley
parents:
diff changeset
170 Collection<String> samplesFromFile;
1485d70afa12 Uploaded
halley
parents:
diff changeset
171 if (!sampleFile.isEmpty()) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
172 samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile);
1485d70afa12 Uploaded
halley
parents:
diff changeset
173 samplesToChoose.addAll(samplesFromFile);
1485d70afa12 Uploaded
halley
parents:
diff changeset
174 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
175
1485d70afa12 Uploaded
halley
parents:
diff changeset
176 if (!sampleNames.isEmpty())
1485d70afa12 Uploaded
halley
parents:
diff changeset
177 samplesToChoose.addAll(sampleNames);
1485d70afa12 Uploaded
halley
parents:
diff changeset
178
1485d70afa12 Uploaded
halley
parents:
diff changeset
179 if(!samplesToChoose.isEmpty()) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
180 SAMPLES_SPECIFIED = true;
1485d70afa12 Uploaded
halley
parents:
diff changeset
181 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
182
1485d70afa12 Uploaded
halley
parents:
diff changeset
183 random = GenomeAnalysisEngine.getRandomGenerator();
1485d70afa12 Uploaded
halley
parents:
diff changeset
184
1485d70afa12 Uploaded
halley
parents:
diff changeset
185 final boolean preSorted = true;
1485d70afa12 Uploaded
halley
parents:
diff changeset
186 if (getToolkit() != null && getToolkit().getArguments().BQSR_RECAL_FILE != null && !NO_PG_TAG ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
187 Utils.setupWriter(out, toolkit, toolkit.getSAMFileHeader(), preSorted, this, PROGRAM_RECORD_NAME);
1485d70afa12 Uploaded
halley
parents:
diff changeset
188 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
189
1485d70afa12 Uploaded
halley
parents:
diff changeset
190 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
191
1485d70afa12 Uploaded
halley
parents:
diff changeset
192 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
193 * The reads filter function.
1485d70afa12 Uploaded
halley
parents:
diff changeset
194 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
195 * @param ref the reference bases that correspond to our read, if a reference was provided
1485d70afa12 Uploaded
halley
parents:
diff changeset
196 * @param read the read itself, as a GATKSAMRecord
1485d70afa12 Uploaded
halley
parents:
diff changeset
197 * @return true if the read passes the filter, false if it doesn't
1485d70afa12 Uploaded
halley
parents:
diff changeset
198 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
199 public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
200 // check the read group
1485d70afa12 Uploaded
halley
parents:
diff changeset
201 if ( readGroup != null ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
202 SAMReadGroupRecord myReadGroup = read.getReadGroup();
1485d70afa12 Uploaded
halley
parents:
diff changeset
203 if ( myReadGroup == null || !readGroup.equals(myReadGroup.getReadGroupId()) )
1485d70afa12 Uploaded
halley
parents:
diff changeset
204 return false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
205 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
206
1485d70afa12 Uploaded
halley
parents:
diff changeset
207 // check the platform
1485d70afa12 Uploaded
halley
parents:
diff changeset
208 if ( platform != null ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
209 SAMReadGroupRecord readGroup = read.getReadGroup();
1485d70afa12 Uploaded
halley
parents:
diff changeset
210 if ( readGroup == null )
1485d70afa12 Uploaded
halley
parents:
diff changeset
211 return false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
212
1485d70afa12 Uploaded
halley
parents:
diff changeset
213 Object readPlatformAttr = readGroup.getAttribute("PL");
1485d70afa12 Uploaded
halley
parents:
diff changeset
214 if ( readPlatformAttr == null || !readPlatformAttr.toString().toUpperCase().contains(platform))
1485d70afa12 Uploaded
halley
parents:
diff changeset
215 return false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
216 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
217 if (SAMPLES_SPECIFIED ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
218 // user specified samples to select
1485d70afa12 Uploaded
halley
parents:
diff changeset
219 // todo - should be case-agnostic but for simplicity and speed this is ignored.
1485d70afa12 Uploaded
halley
parents:
diff changeset
220 // todo - can check at initialization intersection of requested samples and samples in BAM header to further speedup.
1485d70afa12 Uploaded
halley
parents:
diff changeset
221 if (!samplesToChoose.contains(read.getReadGroup().getSample()))
1485d70afa12 Uploaded
halley
parents:
diff changeset
222 return false;
1485d70afa12 Uploaded
halley
parents:
diff changeset
223 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
224
1485d70afa12 Uploaded
halley
parents:
diff changeset
225
1485d70afa12 Uploaded
halley
parents:
diff changeset
226 // check if we've reached the output limit
1485d70afa12 Uploaded
halley
parents:
diff changeset
227 if ( nReadsToPrint == 0 ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
228 return false; // n == 0 means we've printed all we needed.
1485d70afa12 Uploaded
halley
parents:
diff changeset
229 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
230 else if (nReadsToPrint > 0) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
231 nReadsToPrint--; // n > 0 means there are still reads to be printed.
1485d70afa12 Uploaded
halley
parents:
diff changeset
232 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
233
1485d70afa12 Uploaded
halley
parents:
diff changeset
234 return true;
1485d70afa12 Uploaded
halley
parents:
diff changeset
235 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
236
1485d70afa12 Uploaded
halley
parents:
diff changeset
237 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
238 * The reads map function.
1485d70afa12 Uploaded
halley
parents:
diff changeset
239 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
240 * @param ref the reference bases that correspond to our read, if a reference was provided
1485d70afa12 Uploaded
halley
parents:
diff changeset
241 * @param readIn the read itself, as a GATKSAMRecord
1485d70afa12 Uploaded
halley
parents:
diff changeset
242 * @return the read itself
1485d70afa12 Uploaded
halley
parents:
diff changeset
243 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
244 public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord readIn, RefMetaDataTracker metaDataTracker ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
245 GATKSAMRecord workingRead = readIn;
1485d70afa12 Uploaded
halley
parents:
diff changeset
246
1485d70afa12 Uploaded
halley
parents:
diff changeset
247 for ( final ReadTransformer transformer : readTransformers ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
248 workingRead = transformer.apply(workingRead);
1485d70afa12 Uploaded
halley
parents:
diff changeset
249 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
250
1485d70afa12 Uploaded
halley
parents:
diff changeset
251 if ( simplifyReads ) workingRead = workingRead.simplify();
1485d70afa12 Uploaded
halley
parents:
diff changeset
252
1485d70afa12 Uploaded
halley
parents:
diff changeset
253 return workingRead;
1485d70afa12 Uploaded
halley
parents:
diff changeset
254 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
255
1485d70afa12 Uploaded
halley
parents:
diff changeset
256 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
257 * reduceInit is called once before any calls to the map function. We use it here to setup the output
1485d70afa12 Uploaded
halley
parents:
diff changeset
258 * bam file, if it was specified on the command line
1485d70afa12 Uploaded
halley
parents:
diff changeset
259 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
260 * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
1485d70afa12 Uploaded
halley
parents:
diff changeset
261 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
262 public SAMFileWriter reduceInit() {
1485d70afa12 Uploaded
halley
parents:
diff changeset
263 return out;
1485d70afa12 Uploaded
halley
parents:
diff changeset
264 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
265
1485d70afa12 Uploaded
halley
parents:
diff changeset
266 /**
1485d70afa12 Uploaded
halley
parents:
diff changeset
267 * given a read and a output location, reduce by emitting the read
1485d70afa12 Uploaded
halley
parents:
diff changeset
268 *
1485d70afa12 Uploaded
halley
parents:
diff changeset
269 * @param read the read itself
1485d70afa12 Uploaded
halley
parents:
diff changeset
270 * @param output the output source
1485d70afa12 Uploaded
halley
parents:
diff changeset
271 * @return the SAMFileWriter, so that the next reduce can emit to the same source
1485d70afa12 Uploaded
halley
parents:
diff changeset
272 */
1485d70afa12 Uploaded
halley
parents:
diff changeset
273 public SAMFileWriter reduce( GATKSAMRecord read, SAMFileWriter output ) {
1485d70afa12 Uploaded
halley
parents:
diff changeset
274 output.addAlignment(read);
1485d70afa12 Uploaded
halley
parents:
diff changeset
275 return output;
1485d70afa12 Uploaded
halley
parents:
diff changeset
276 }
1485d70afa12 Uploaded
halley
parents:
diff changeset
277 }